inline.go 24 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. // Functions to parse inline elements.
  11. //
  12. package blackfriday
  13. import (
  14. "bytes"
  15. "regexp"
  16. "strconv"
  17. )
  18. var (
  19. urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  20. anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  21. // TODO: improve this regexp to catch all possible entities:
  22. htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
  23. )
  24. // Functions to parse text within a block
  25. // Each function returns the number of chars taken care of
  26. // data is the complete block being rendered
  27. // offset is the number of valid chars before the current cursor
  28. func (p *Markdown) inline(currBlock *Node, data []byte) {
  29. // handlers might call us recursively: enforce a maximum depth
  30. if p.nesting >= p.maxNesting || len(data) == 0 {
  31. return
  32. }
  33. p.nesting++
  34. beg, end := 0, 0
  35. for end < len(data) {
  36. handler := p.inlineCallback[data[end]]
  37. if handler != nil {
  38. if consumed, node := handler(p, data, end); consumed == 0 {
  39. // No action from the callback.
  40. end++
  41. } else {
  42. // Copy inactive chars into the output.
  43. currBlock.AppendChild(text(data[beg:end]))
  44. if node != nil {
  45. currBlock.AppendChild(node)
  46. }
  47. // Skip past whatever the callback used.
  48. beg = end + consumed
  49. end = beg
  50. }
  51. } else {
  52. end++
  53. }
  54. }
  55. if beg < len(data) {
  56. if data[end-1] == '\n' {
  57. end--
  58. }
  59. currBlock.AppendChild(text(data[beg:end]))
  60. }
  61. p.nesting--
  62. }
  63. // single and double emphasis parsing
  64. func emphasis(p *Markdown, data []byte, offset int) (int, *Node) {
  65. data = data[offset:]
  66. c := data[0]
  67. if len(data) > 2 && data[1] != c {
  68. // whitespace cannot follow an opening emphasis;
  69. // strikethrough only takes two characters '~~'
  70. if c == '~' || isspace(data[1]) {
  71. return 0, nil
  72. }
  73. ret, node := helperEmphasis(p, data[1:], c)
  74. if ret == 0 {
  75. return 0, nil
  76. }
  77. return ret + 1, node
  78. }
  79. if len(data) > 3 && data[1] == c && data[2] != c {
  80. if isspace(data[2]) {
  81. return 0, nil
  82. }
  83. ret, node := helperDoubleEmphasis(p, data[2:], c)
  84. if ret == 0 {
  85. return 0, nil
  86. }
  87. return ret + 2, node
  88. }
  89. if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
  90. if c == '~' || isspace(data[3]) {
  91. return 0, nil
  92. }
  93. ret, node := helperTripleEmphasis(p, data, 3, c)
  94. if ret == 0 {
  95. return 0, nil
  96. }
  97. return ret + 3, node
  98. }
  99. return 0, nil
  100. }
  101. func codeSpan(p *Markdown, data []byte, offset int) (int, *Node) {
  102. data = data[offset:]
  103. nb := 0
  104. // count the number of backticks in the delimiter
  105. for nb < len(data) && data[nb] == '`' {
  106. nb++
  107. }
  108. // find the next delimiter
  109. i, end := 0, 0
  110. for end = nb; end < len(data) && i < nb; end++ {
  111. if data[end] == '`' {
  112. i++
  113. } else {
  114. i = 0
  115. }
  116. }
  117. // no matching delimiter?
  118. if i < nb && end >= len(data) {
  119. return 0, nil
  120. }
  121. // trim outside whitespace
  122. fBegin := nb
  123. for fBegin < end && data[fBegin] == ' ' {
  124. fBegin++
  125. }
  126. fEnd := end - nb
  127. for fEnd > fBegin && data[fEnd-1] == ' ' {
  128. fEnd--
  129. }
  130. // render the code span
  131. if fBegin != fEnd {
  132. code := NewNode(Code)
  133. code.Literal = data[fBegin:fEnd]
  134. return end, code
  135. }
  136. return end, nil
  137. }
  138. // newline preceded by two spaces becomes <br>
  139. func maybeLineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
  140. origOffset := offset
  141. for offset < len(data) && data[offset] == ' ' {
  142. offset++
  143. }
  144. if offset < len(data) && data[offset] == '\n' {
  145. if offset-origOffset >= 2 {
  146. return offset - origOffset + 1, NewNode(Hardbreak)
  147. }
  148. return offset - origOffset, nil
  149. }
  150. return 0, nil
  151. }
  152. // newline without two spaces works when HardLineBreak is enabled
  153. func lineBreak(p *Markdown, data []byte, offset int) (int, *Node) {
  154. if p.extensions&HardLineBreak != 0 {
  155. return 1, NewNode(Hardbreak)
  156. }
  157. return 0, nil
  158. }
  159. type linkType int
  160. const (
  161. linkNormal linkType = iota
  162. linkImg
  163. linkDeferredFootnote
  164. linkInlineFootnote
  165. )
  166. func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
  167. if t == linkDeferredFootnote {
  168. return false
  169. }
  170. return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
  171. }
  172. func maybeImage(p *Markdown, data []byte, offset int) (int, *Node) {
  173. if offset < len(data)-1 && data[offset+1] == '[' {
  174. return link(p, data, offset)
  175. }
  176. return 0, nil
  177. }
  178. func maybeInlineFootnote(p *Markdown, data []byte, offset int) (int, *Node) {
  179. if offset < len(data)-1 && data[offset+1] == '[' {
  180. return link(p, data, offset)
  181. }
  182. return 0, nil
  183. }
  184. // '[': parse a link or an image or a footnote
  185. func link(p *Markdown, data []byte, offset int) (int, *Node) {
  186. // no links allowed inside regular links, footnote, and deferred footnotes
  187. if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
  188. return 0, nil
  189. }
  190. var t linkType
  191. switch {
  192. // special case: ![^text] == deferred footnote (that follows something with
  193. // an exclamation point)
  194. case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
  195. t = linkDeferredFootnote
  196. // ![alt] == image
  197. case offset >= 0 && data[offset] == '!':
  198. t = linkImg
  199. offset++
  200. // ^[text] == inline footnote
  201. // [^refId] == deferred footnote
  202. case p.extensions&Footnotes != 0:
  203. if offset >= 0 && data[offset] == '^' {
  204. t = linkInlineFootnote
  205. offset++
  206. } else if len(data)-1 > offset && data[offset+1] == '^' {
  207. t = linkDeferredFootnote
  208. }
  209. // [text] == regular link
  210. default:
  211. t = linkNormal
  212. }
  213. data = data[offset:]
  214. var (
  215. i = 1
  216. noteID int
  217. title, link, altContent []byte
  218. textHasNl = false
  219. )
  220. if t == linkDeferredFootnote {
  221. i++
  222. }
  223. // look for the matching closing bracket
  224. for level := 1; level > 0 && i < len(data); i++ {
  225. switch {
  226. case data[i] == '\n':
  227. textHasNl = true
  228. case data[i-1] == '\\':
  229. continue
  230. case data[i] == '[':
  231. level++
  232. case data[i] == ']':
  233. level--
  234. if level <= 0 {
  235. i-- // compensate for extra i++ in for loop
  236. }
  237. }
  238. }
  239. if i >= len(data) {
  240. return 0, nil
  241. }
  242. txtE := i
  243. i++
  244. var footnoteNode *Node
  245. // skip any amount of whitespace or newline
  246. // (this is much more lax than original markdown syntax)
  247. for i < len(data) && isspace(data[i]) {
  248. i++
  249. }
  250. // inline style link
  251. switch {
  252. case i < len(data) && data[i] == '(':
  253. // skip initial whitespace
  254. i++
  255. for i < len(data) && isspace(data[i]) {
  256. i++
  257. }
  258. linkB := i
  259. // look for link end: ' " )
  260. findlinkend:
  261. for i < len(data) {
  262. switch {
  263. case data[i] == '\\':
  264. i += 2
  265. case data[i] == ')' || data[i] == '\'' || data[i] == '"':
  266. break findlinkend
  267. default:
  268. i++
  269. }
  270. }
  271. if i >= len(data) {
  272. return 0, nil
  273. }
  274. linkE := i
  275. // look for title end if present
  276. titleB, titleE := 0, 0
  277. if data[i] == '\'' || data[i] == '"' {
  278. i++
  279. titleB = i
  280. findtitleend:
  281. for i < len(data) {
  282. switch {
  283. case data[i] == '\\':
  284. i += 2
  285. case data[i] == ')':
  286. break findtitleend
  287. default:
  288. i++
  289. }
  290. }
  291. if i >= len(data) {
  292. return 0, nil
  293. }
  294. // skip whitespace after title
  295. titleE = i - 1
  296. for titleE > titleB && isspace(data[titleE]) {
  297. titleE--
  298. }
  299. // check for closing quote presence
  300. if data[titleE] != '\'' && data[titleE] != '"' {
  301. titleB, titleE = 0, 0
  302. linkE = i
  303. }
  304. }
  305. // remove whitespace at the end of the link
  306. for linkE > linkB && isspace(data[linkE-1]) {
  307. linkE--
  308. }
  309. // remove optional angle brackets around the link
  310. if data[linkB] == '<' {
  311. linkB++
  312. }
  313. if data[linkE-1] == '>' {
  314. linkE--
  315. }
  316. // build escaped link and title
  317. if linkE > linkB {
  318. link = data[linkB:linkE]
  319. }
  320. if titleE > titleB {
  321. title = data[titleB:titleE]
  322. }
  323. i++
  324. // reference style link
  325. case isReferenceStyleLink(data, i, t):
  326. var id []byte
  327. altContentConsidered := false
  328. // look for the id
  329. i++
  330. linkB := i
  331. for i < len(data) && data[i] != ']' {
  332. i++
  333. }
  334. if i >= len(data) {
  335. return 0, nil
  336. }
  337. linkE := i
  338. // find the reference
  339. if linkB == linkE {
  340. if textHasNl {
  341. var b bytes.Buffer
  342. for j := 1; j < txtE; j++ {
  343. switch {
  344. case data[j] != '\n':
  345. b.WriteByte(data[j])
  346. case data[j-1] != ' ':
  347. b.WriteByte(' ')
  348. }
  349. }
  350. id = b.Bytes()
  351. } else {
  352. id = data[1:txtE]
  353. altContentConsidered = true
  354. }
  355. } else {
  356. id = data[linkB:linkE]
  357. }
  358. // find the reference with matching id
  359. lr, ok := p.getRef(string(id))
  360. if !ok {
  361. return 0, nil
  362. }
  363. // keep link and title from reference
  364. link = lr.link
  365. title = lr.title
  366. if altContentConsidered {
  367. altContent = lr.text
  368. }
  369. i++
  370. // shortcut reference style link or reference or inline footnote
  371. default:
  372. var id []byte
  373. // craft the id
  374. if textHasNl {
  375. var b bytes.Buffer
  376. for j := 1; j < txtE; j++ {
  377. switch {
  378. case data[j] != '\n':
  379. b.WriteByte(data[j])
  380. case data[j-1] != ' ':
  381. b.WriteByte(' ')
  382. }
  383. }
  384. id = b.Bytes()
  385. } else {
  386. if t == linkDeferredFootnote {
  387. id = data[2:txtE] // get rid of the ^
  388. } else {
  389. id = data[1:txtE]
  390. }
  391. }
  392. footnoteNode = NewNode(Item)
  393. if t == linkInlineFootnote {
  394. // create a new reference
  395. noteID = len(p.notes) + 1
  396. var fragment []byte
  397. if len(id) > 0 {
  398. if len(id) < 16 {
  399. fragment = make([]byte, len(id))
  400. } else {
  401. fragment = make([]byte, 16)
  402. }
  403. copy(fragment, slugify(id))
  404. } else {
  405. fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
  406. }
  407. ref := &reference{
  408. noteID: noteID,
  409. hasBlock: false,
  410. link: fragment,
  411. title: id,
  412. footnote: footnoteNode,
  413. }
  414. p.notes = append(p.notes, ref)
  415. link = ref.link
  416. title = ref.title
  417. } else {
  418. // find the reference with matching id
  419. lr, ok := p.getRef(string(id))
  420. if !ok {
  421. return 0, nil
  422. }
  423. if t == linkDeferredFootnote {
  424. lr.noteID = len(p.notes) + 1
  425. lr.footnote = footnoteNode
  426. p.notes = append(p.notes, lr)
  427. }
  428. // keep link and title from reference
  429. link = lr.link
  430. // if inline footnote, title == footnote contents
  431. title = lr.title
  432. noteID = lr.noteID
  433. }
  434. // rewind the whitespace
  435. i = txtE + 1
  436. }
  437. var uLink []byte
  438. if t == linkNormal || t == linkImg {
  439. if len(link) > 0 {
  440. var uLinkBuf bytes.Buffer
  441. unescapeText(&uLinkBuf, link)
  442. uLink = uLinkBuf.Bytes()
  443. }
  444. // links need something to click on and somewhere to go
  445. if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
  446. return 0, nil
  447. }
  448. }
  449. // call the relevant rendering function
  450. var linkNode *Node
  451. switch t {
  452. case linkNormal:
  453. linkNode = NewNode(Link)
  454. linkNode.Destination = normalizeURI(uLink)
  455. linkNode.Title = title
  456. if len(altContent) > 0 {
  457. linkNode.AppendChild(text(altContent))
  458. } else {
  459. // links cannot contain other links, so turn off link parsing
  460. // temporarily and recurse
  461. insideLink := p.insideLink
  462. p.insideLink = true
  463. p.inline(linkNode, data[1:txtE])
  464. p.insideLink = insideLink
  465. }
  466. case linkImg:
  467. linkNode = NewNode(Image)
  468. linkNode.Destination = uLink
  469. linkNode.Title = title
  470. linkNode.AppendChild(text(data[1:txtE]))
  471. i++
  472. case linkInlineFootnote, linkDeferredFootnote:
  473. linkNode = NewNode(Link)
  474. linkNode.Destination = link
  475. linkNode.Title = title
  476. linkNode.NoteID = noteID
  477. linkNode.Footnote = footnoteNode
  478. if t == linkInlineFootnote {
  479. i++
  480. }
  481. default:
  482. return 0, nil
  483. }
  484. return i, linkNode
  485. }
  486. func (p *Markdown) inlineHTMLComment(data []byte) int {
  487. if len(data) < 5 {
  488. return 0
  489. }
  490. if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
  491. return 0
  492. }
  493. i := 5
  494. // scan for an end-of-comment marker, across lines if necessary
  495. for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
  496. i++
  497. }
  498. // no end-of-comment marker
  499. if i >= len(data) {
  500. return 0
  501. }
  502. return i + 1
  503. }
  504. func stripMailto(link []byte) []byte {
  505. if bytes.HasPrefix(link, []byte("mailto://")) {
  506. return link[9:]
  507. } else if bytes.HasPrefix(link, []byte("mailto:")) {
  508. return link[7:]
  509. } else {
  510. return link
  511. }
  512. }
  513. // autolinkType specifies a kind of autolink that gets detected.
  514. type autolinkType int
  515. // These are the possible flag values for the autolink renderer.
  516. const (
  517. notAutolink autolinkType = iota
  518. normalAutolink
  519. emailAutolink
  520. )
  521. // '<' when tags or autolinks are allowed
  522. func leftAngle(p *Markdown, data []byte, offset int) (int, *Node) {
  523. data = data[offset:]
  524. altype, end := tagLength(data)
  525. if size := p.inlineHTMLComment(data); size > 0 {
  526. end = size
  527. }
  528. if end > 2 {
  529. if altype != notAutolink {
  530. var uLink bytes.Buffer
  531. unescapeText(&uLink, data[1:end+1-2])
  532. if uLink.Len() > 0 {
  533. link := uLink.Bytes()
  534. node := NewNode(Link)
  535. node.Destination = link
  536. if altype == emailAutolink {
  537. node.Destination = append([]byte("mailto:"), link...)
  538. }
  539. node.AppendChild(text(stripMailto(link)))
  540. return end, node
  541. }
  542. } else {
  543. htmlTag := NewNode(HTMLSpan)
  544. htmlTag.Literal = data[:end]
  545. return end, htmlTag
  546. }
  547. }
  548. return end, nil
  549. }
  550. // '\\' backslash escape
  551. var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
  552. func escape(p *Markdown, data []byte, offset int) (int, *Node) {
  553. data = data[offset:]
  554. if len(data) > 1 {
  555. if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
  556. return 2, NewNode(Hardbreak)
  557. }
  558. if bytes.IndexByte(escapeChars, data[1]) < 0 {
  559. return 0, nil
  560. }
  561. return 2, text(data[1:2])
  562. }
  563. return 2, nil
  564. }
  565. func unescapeText(ob *bytes.Buffer, src []byte) {
  566. i := 0
  567. for i < len(src) {
  568. org := i
  569. for i < len(src) && src[i] != '\\' {
  570. i++
  571. }
  572. if i > org {
  573. ob.Write(src[org:i])
  574. }
  575. if i+1 >= len(src) {
  576. break
  577. }
  578. ob.WriteByte(src[i+1])
  579. i += 2
  580. }
  581. }
  582. // '&' escaped when it doesn't belong to an entity
  583. // valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
  584. func entity(p *Markdown, data []byte, offset int) (int, *Node) {
  585. data = data[offset:]
  586. end := 1
  587. if end < len(data) && data[end] == '#' {
  588. end++
  589. }
  590. for end < len(data) && isalnum(data[end]) {
  591. end++
  592. }
  593. if end < len(data) && data[end] == ';' {
  594. end++ // real entity
  595. } else {
  596. return 0, nil // lone '&'
  597. }
  598. ent := data[:end]
  599. // undo &amp; escaping or it will be converted to &amp;amp; by another
  600. // escaper in the renderer
  601. if bytes.Equal(ent, []byte("&amp;")) {
  602. ent = []byte{'&'}
  603. }
  604. return end, text(ent)
  605. }
  606. func linkEndsWithEntity(data []byte, linkEnd int) bool {
  607. entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
  608. return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
  609. }
  610. // hasPrefixCaseInsensitive is a custom implementation of
  611. // strings.HasPrefix(strings.ToLower(s), prefix)
  612. // we rolled our own because ToLower pulls in a huge machinery of lowercasing
  613. // anything from Unicode and that's very slow. Since this func will only be
  614. // used on ASCII protocol prefixes, we can take shortcuts.
  615. func hasPrefixCaseInsensitive(s, prefix []byte) bool {
  616. if len(s) < len(prefix) {
  617. return false
  618. }
  619. delta := byte('a' - 'A')
  620. for i, b := range prefix {
  621. if b != s[i] && b != s[i]+delta {
  622. return false
  623. }
  624. }
  625. return true
  626. }
  627. var protocolPrefixes = [][]byte{
  628. []byte("http://"),
  629. []byte("https://"),
  630. []byte("ftp://"),
  631. []byte("file://"),
  632. []byte("mailto:"),
  633. }
  634. const shortestPrefix = 6 // len("ftp://"), the shortest of the above
  635. func maybeAutoLink(p *Markdown, data []byte, offset int) (int, *Node) {
  636. // quick check to rule out most false hits
  637. if p.insideLink || len(data) < offset+shortestPrefix {
  638. return 0, nil
  639. }
  640. for _, prefix := range protocolPrefixes {
  641. endOfHead := offset + 8 // 8 is the len() of the longest prefix
  642. if endOfHead > len(data) {
  643. endOfHead = len(data)
  644. }
  645. if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
  646. return autoLink(p, data, offset)
  647. }
  648. }
  649. return 0, nil
  650. }
  651. func autoLink(p *Markdown, data []byte, offset int) (int, *Node) {
  652. // Now a more expensive check to see if we're not inside an anchor element
  653. anchorStart := offset
  654. offsetFromAnchor := 0
  655. for anchorStart > 0 && data[anchorStart] != '<' {
  656. anchorStart--
  657. offsetFromAnchor++
  658. }
  659. anchorStr := anchorRe.Find(data[anchorStart:])
  660. if anchorStr != nil {
  661. anchorClose := NewNode(HTMLSpan)
  662. anchorClose.Literal = anchorStr[offsetFromAnchor:]
  663. return len(anchorStr) - offsetFromAnchor, anchorClose
  664. }
  665. // scan backward for a word boundary
  666. rewind := 0
  667. for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
  668. rewind++
  669. }
  670. if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
  671. return 0, nil
  672. }
  673. origData := data
  674. data = data[offset-rewind:]
  675. if !isSafeLink(data) {
  676. return 0, nil
  677. }
  678. linkEnd := 0
  679. for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
  680. linkEnd++
  681. }
  682. // Skip punctuation at the end of the link
  683. if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
  684. linkEnd--
  685. }
  686. // But don't skip semicolon if it's a part of escaped entity:
  687. if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
  688. linkEnd--
  689. }
  690. // See if the link finishes with a punctuation sign that can be closed.
  691. var copen byte
  692. switch data[linkEnd-1] {
  693. case '"':
  694. copen = '"'
  695. case '\'':
  696. copen = '\''
  697. case ')':
  698. copen = '('
  699. case ']':
  700. copen = '['
  701. case '}':
  702. copen = '{'
  703. default:
  704. copen = 0
  705. }
  706. if copen != 0 {
  707. bufEnd := offset - rewind + linkEnd - 2
  708. openDelim := 1
  709. /* Try to close the final punctuation sign in this same line;
  710. * if we managed to close it outside of the URL, that means that it's
  711. * not part of the URL. If it closes inside the URL, that means it
  712. * is part of the URL.
  713. *
  714. * Examples:
  715. *
  716. * foo http://www.pokemon.com/Pikachu_(Electric) bar
  717. * => http://www.pokemon.com/Pikachu_(Electric)
  718. *
  719. * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
  720. * => http://www.pokemon.com/Pikachu_(Electric)
  721. *
  722. * foo http://www.pokemon.com/Pikachu_(Electric)) bar
  723. * => http://www.pokemon.com/Pikachu_(Electric))
  724. *
  725. * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
  726. * => foo http://www.pokemon.com/Pikachu_(Electric)
  727. */
  728. for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
  729. if origData[bufEnd] == data[linkEnd-1] {
  730. openDelim++
  731. }
  732. if origData[bufEnd] == copen {
  733. openDelim--
  734. }
  735. bufEnd--
  736. }
  737. if openDelim == 0 {
  738. linkEnd--
  739. }
  740. }
  741. var uLink bytes.Buffer
  742. unescapeText(&uLink, data[:linkEnd])
  743. if uLink.Len() > 0 {
  744. node := NewNode(Link)
  745. node.Destination = uLink.Bytes()
  746. node.AppendChild(text(uLink.Bytes()))
  747. return linkEnd, node
  748. }
  749. return linkEnd, nil
  750. }
  751. func isEndOfLink(char byte) bool {
  752. return isspace(char) || char == '<'
  753. }
  754. var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
  755. var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
  756. func isSafeLink(link []byte) bool {
  757. for _, path := range validPaths {
  758. if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
  759. if len(link) == len(path) {
  760. return true
  761. } else if isalnum(link[len(path)]) {
  762. return true
  763. }
  764. }
  765. }
  766. for _, prefix := range validUris {
  767. // TODO: handle unicode here
  768. // case-insensitive prefix test
  769. if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
  770. return true
  771. }
  772. }
  773. return false
  774. }
  775. // return the length of the given tag, or 0 is it's not valid
  776. func tagLength(data []byte) (autolink autolinkType, end int) {
  777. var i, j int
  778. // a valid tag can't be shorter than 3 chars
  779. if len(data) < 3 {
  780. return notAutolink, 0
  781. }
  782. // begins with a '<' optionally followed by '/', followed by letter or number
  783. if data[0] != '<' {
  784. return notAutolink, 0
  785. }
  786. if data[1] == '/' {
  787. i = 2
  788. } else {
  789. i = 1
  790. }
  791. if !isalnum(data[i]) {
  792. return notAutolink, 0
  793. }
  794. // scheme test
  795. autolink = notAutolink
  796. // try to find the beginning of an URI
  797. for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
  798. i++
  799. }
  800. if i > 1 && i < len(data) && data[i] == '@' {
  801. if j = isMailtoAutoLink(data[i:]); j != 0 {
  802. return emailAutolink, i + j
  803. }
  804. }
  805. if i > 2 && i < len(data) && data[i] == ':' {
  806. autolink = normalAutolink
  807. i++
  808. }
  809. // complete autolink test: no whitespace or ' or "
  810. switch {
  811. case i >= len(data):
  812. autolink = notAutolink
  813. case autolink != notAutolink:
  814. j = i
  815. for i < len(data) {
  816. if data[i] == '\\' {
  817. i += 2
  818. } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
  819. break
  820. } else {
  821. i++
  822. }
  823. }
  824. if i >= len(data) {
  825. return autolink, 0
  826. }
  827. if i > j && data[i] == '>' {
  828. return autolink, i + 1
  829. }
  830. // one of the forbidden chars has been found
  831. autolink = notAutolink
  832. }
  833. i += bytes.IndexByte(data[i:], '>')
  834. if i < 0 {
  835. return autolink, 0
  836. }
  837. return autolink, i + 1
  838. }
  839. // look for the address part of a mail autolink and '>'
  840. // this is less strict than the original markdown e-mail address matching
  841. func isMailtoAutoLink(data []byte) int {
  842. nb := 0
  843. // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
  844. for i := 0; i < len(data); i++ {
  845. if isalnum(data[i]) {
  846. continue
  847. }
  848. switch data[i] {
  849. case '@':
  850. nb++
  851. case '-', '.', '_':
  852. break
  853. case '>':
  854. if nb == 1 {
  855. return i + 1
  856. }
  857. return 0
  858. default:
  859. return 0
  860. }
  861. }
  862. return 0
  863. }
  864. // look for the next emph char, skipping other constructs
  865. func helperFindEmphChar(data []byte, c byte) int {
  866. i := 0
  867. for i < len(data) {
  868. for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
  869. i++
  870. }
  871. if i >= len(data) {
  872. return 0
  873. }
  874. // do not count escaped chars
  875. if i != 0 && data[i-1] == '\\' {
  876. i++
  877. continue
  878. }
  879. if data[i] == c {
  880. return i
  881. }
  882. if data[i] == '`' {
  883. // skip a code span
  884. tmpI := 0
  885. i++
  886. for i < len(data) && data[i] != '`' {
  887. if tmpI == 0 && data[i] == c {
  888. tmpI = i
  889. }
  890. i++
  891. }
  892. if i >= len(data) {
  893. return tmpI
  894. }
  895. i++
  896. } else if data[i] == '[' {
  897. // skip a link
  898. tmpI := 0
  899. i++
  900. for i < len(data) && data[i] != ']' {
  901. if tmpI == 0 && data[i] == c {
  902. tmpI = i
  903. }
  904. i++
  905. }
  906. i++
  907. for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
  908. i++
  909. }
  910. if i >= len(data) {
  911. return tmpI
  912. }
  913. if data[i] != '[' && data[i] != '(' { // not a link
  914. if tmpI > 0 {
  915. return tmpI
  916. }
  917. continue
  918. }
  919. cc := data[i]
  920. i++
  921. for i < len(data) && data[i] != cc {
  922. if tmpI == 0 && data[i] == c {
  923. return i
  924. }
  925. i++
  926. }
  927. if i >= len(data) {
  928. return tmpI
  929. }
  930. i++
  931. }
  932. }
  933. return 0
  934. }
  935. func helperEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
  936. i := 0
  937. // skip one symbol if coming from emph3
  938. if len(data) > 1 && data[0] == c && data[1] == c {
  939. i = 1
  940. }
  941. for i < len(data) {
  942. length := helperFindEmphChar(data[i:], c)
  943. if length == 0 {
  944. return 0, nil
  945. }
  946. i += length
  947. if i >= len(data) {
  948. return 0, nil
  949. }
  950. if i+1 < len(data) && data[i+1] == c {
  951. i++
  952. continue
  953. }
  954. if data[i] == c && !isspace(data[i-1]) {
  955. if p.extensions&NoIntraEmphasis != 0 {
  956. if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
  957. continue
  958. }
  959. }
  960. emph := NewNode(Emph)
  961. p.inline(emph, data[:i])
  962. return i + 1, emph
  963. }
  964. }
  965. return 0, nil
  966. }
  967. func helperDoubleEmphasis(p *Markdown, data []byte, c byte) (int, *Node) {
  968. i := 0
  969. for i < len(data) {
  970. length := helperFindEmphChar(data[i:], c)
  971. if length == 0 {
  972. return 0, nil
  973. }
  974. i += length
  975. if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
  976. nodeType := Strong
  977. if c == '~' {
  978. nodeType = Del
  979. }
  980. node := NewNode(nodeType)
  981. p.inline(node, data[:i])
  982. return i + 2, node
  983. }
  984. i++
  985. }
  986. return 0, nil
  987. }
  988. func helperTripleEmphasis(p *Markdown, data []byte, offset int, c byte) (int, *Node) {
  989. i := 0
  990. origData := data
  991. data = data[offset:]
  992. for i < len(data) {
  993. length := helperFindEmphChar(data[i:], c)
  994. if length == 0 {
  995. return 0, nil
  996. }
  997. i += length
  998. // skip whitespace preceded symbols
  999. if data[i] != c || isspace(data[i-1]) {
  1000. continue
  1001. }
  1002. switch {
  1003. case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
  1004. // triple symbol found
  1005. strong := NewNode(Strong)
  1006. em := NewNode(Emph)
  1007. strong.AppendChild(em)
  1008. p.inline(em, data[:i])
  1009. return i + 3, strong
  1010. case (i+1 < len(data) && data[i+1] == c):
  1011. // double symbol found, hand over to emph1
  1012. length, node := helperEmphasis(p, origData[offset-2:], c)
  1013. if length == 0 {
  1014. return 0, nil
  1015. }
  1016. return length - 2, node
  1017. default:
  1018. // single symbol found, hand over to emph2
  1019. length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
  1020. if length == 0 {
  1021. return 0, nil
  1022. }
  1023. return length - 1, node
  1024. }
  1025. }
  1026. return 0, nil
  1027. }
  1028. func text(s []byte) *Node {
  1029. node := NewNode(Text)
  1030. node.Literal = s
  1031. return node
  1032. }
  1033. func normalizeURI(s []byte) []byte {
  1034. return s // TODO: implement
  1035. }