block.go 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549
  1. //
  2. // Blackfriday Markdown Processor
  3. // Available at http://github.com/russross/blackfriday
  4. //
  5. // Copyright © 2011 Russ Ross <russ@russross.com>.
  6. // Distributed under the Simplified BSD License.
  7. // See README.md for details.
  8. //
  9. //
  10. // Functions to parse block-level elements.
  11. //
  12. package blackfriday
  13. import (
  14. "bytes"
  15. "html"
  16. "regexp"
  17. "github.com/shurcooL/sanitized_anchor_name"
  18. )
  19. const (
  20. charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
  21. escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
  22. )
  23. var (
  24. reBackslashOrAmp = regexp.MustCompile("[\\&]")
  25. reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
  26. )
  27. // Parse block-level data.
  28. // Note: this function and many that it calls assume that
  29. // the input buffer ends with a newline.
  30. func (p *Markdown) block(data []byte) {
  31. // this is called recursively: enforce a maximum depth
  32. if p.nesting >= p.maxNesting {
  33. return
  34. }
  35. p.nesting++
  36. // parse out one block-level construct at a time
  37. for len(data) > 0 {
  38. // prefixed heading:
  39. //
  40. // # Heading 1
  41. // ## Heading 2
  42. // ...
  43. // ###### Heading 6
  44. if p.isPrefixHeading(data) {
  45. data = data[p.prefixHeading(data):]
  46. continue
  47. }
  48. // block of preformatted HTML:
  49. //
  50. // <div>
  51. // ...
  52. // </div>
  53. if data[0] == '<' {
  54. if i := p.html(data, true); i > 0 {
  55. data = data[i:]
  56. continue
  57. }
  58. }
  59. // title block
  60. //
  61. // % stuff
  62. // % more stuff
  63. // % even more stuff
  64. if p.extensions&Titleblock != 0 {
  65. if data[0] == '%' {
  66. if i := p.titleBlock(data, true); i > 0 {
  67. data = data[i:]
  68. continue
  69. }
  70. }
  71. }
  72. // blank lines. note: returns the # of bytes to skip
  73. if i := p.isEmpty(data); i > 0 {
  74. data = data[i:]
  75. continue
  76. }
  77. // indented code block:
  78. //
  79. // func max(a, b int) int {
  80. // if a > b {
  81. // return a
  82. // }
  83. // return b
  84. // }
  85. if p.codePrefix(data) > 0 {
  86. data = data[p.code(data):]
  87. continue
  88. }
  89. // fenced code block:
  90. //
  91. // ``` go
  92. // func fact(n int) int {
  93. // if n <= 1 {
  94. // return n
  95. // }
  96. // return n * fact(n-1)
  97. // }
  98. // ```
  99. if p.extensions&FencedCode != 0 {
  100. if i := p.fencedCodeBlock(data, true); i > 0 {
  101. data = data[i:]
  102. continue
  103. }
  104. }
  105. // horizontal rule:
  106. //
  107. // ------
  108. // or
  109. // ******
  110. // or
  111. // ______
  112. if p.isHRule(data) {
  113. p.addBlock(HorizontalRule, nil)
  114. var i int
  115. for i = 0; i < len(data) && data[i] != '\n'; i++ {
  116. }
  117. data = data[i:]
  118. continue
  119. }
  120. // block quote:
  121. //
  122. // > A big quote I found somewhere
  123. // > on the web
  124. if p.quotePrefix(data) > 0 {
  125. data = data[p.quote(data):]
  126. continue
  127. }
  128. // table:
  129. //
  130. // Name | Age | Phone
  131. // ------|-----|---------
  132. // Bob | 31 | 555-1234
  133. // Alice | 27 | 555-4321
  134. if p.extensions&Tables != 0 {
  135. if i := p.table(data); i > 0 {
  136. data = data[i:]
  137. continue
  138. }
  139. }
  140. // an itemized/unordered list:
  141. //
  142. // * Item 1
  143. // * Item 2
  144. //
  145. // also works with + or -
  146. if p.uliPrefix(data) > 0 {
  147. data = data[p.list(data, 0):]
  148. continue
  149. }
  150. // a numbered/ordered list:
  151. //
  152. // 1. Item 1
  153. // 2. Item 2
  154. if p.oliPrefix(data) > 0 {
  155. data = data[p.list(data, ListTypeOrdered):]
  156. continue
  157. }
  158. // definition lists:
  159. //
  160. // Term 1
  161. // : Definition a
  162. // : Definition b
  163. //
  164. // Term 2
  165. // : Definition c
  166. if p.extensions&DefinitionLists != 0 {
  167. if p.dliPrefix(data) > 0 {
  168. data = data[p.list(data, ListTypeDefinition):]
  169. continue
  170. }
  171. }
  172. // anything else must look like a normal paragraph
  173. // note: this finds underlined headings, too
  174. data = data[p.paragraph(data):]
  175. }
  176. p.nesting--
  177. }
  178. func (p *Markdown) addBlock(typ NodeType, content []byte) *Node {
  179. p.closeUnmatchedBlocks()
  180. container := p.addChild(typ, 0)
  181. container.content = content
  182. return container
  183. }
  184. func (p *Markdown) isPrefixHeading(data []byte) bool {
  185. if data[0] != '#' {
  186. return false
  187. }
  188. if p.extensions&SpaceHeadings != 0 {
  189. level := 0
  190. for level < 6 && level < len(data) && data[level] == '#' {
  191. level++
  192. }
  193. if level == len(data) || data[level] != ' ' {
  194. return false
  195. }
  196. }
  197. return true
  198. }
  199. func (p *Markdown) prefixHeading(data []byte) int {
  200. level := 0
  201. for level < 6 && level < len(data) && data[level] == '#' {
  202. level++
  203. }
  204. i := skipChar(data, level, ' ')
  205. end := skipUntilChar(data, i, '\n')
  206. skip := end
  207. id := ""
  208. if p.extensions&HeadingIDs != 0 {
  209. j, k := 0, 0
  210. // find start/end of heading id
  211. for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
  212. }
  213. for k = j + 1; k < end && data[k] != '}'; k++ {
  214. }
  215. // extract heading id iff found
  216. if j < end && k < end {
  217. id = string(data[j+2 : k])
  218. end = j
  219. skip = k + 1
  220. for end > 0 && data[end-1] == ' ' {
  221. end--
  222. }
  223. }
  224. }
  225. for end > 0 && data[end-1] == '#' {
  226. if isBackslashEscaped(data, end-1) {
  227. break
  228. }
  229. end--
  230. }
  231. for end > 0 && data[end-1] == ' ' {
  232. end--
  233. }
  234. if end > i {
  235. if id == "" && p.extensions&AutoHeadingIDs != 0 {
  236. id = sanitized_anchor_name.Create(string(data[i:end]))
  237. }
  238. block := p.addBlock(Heading, data[i:end])
  239. block.HeadingID = id
  240. block.Level = level
  241. }
  242. return skip
  243. }
  244. func (p *Markdown) isUnderlinedHeading(data []byte) int {
  245. // test of level 1 heading
  246. if data[0] == '=' {
  247. i := skipChar(data, 1, '=')
  248. i = skipChar(data, i, ' ')
  249. if i < len(data) && data[i] == '\n' {
  250. return 1
  251. }
  252. return 0
  253. }
  254. // test of level 2 heading
  255. if data[0] == '-' {
  256. i := skipChar(data, 1, '-')
  257. i = skipChar(data, i, ' ')
  258. if i < len(data) && data[i] == '\n' {
  259. return 2
  260. }
  261. return 0
  262. }
  263. return 0
  264. }
  265. func (p *Markdown) titleBlock(data []byte, doRender bool) int {
  266. if data[0] != '%' {
  267. return 0
  268. }
  269. splitData := bytes.Split(data, []byte("\n"))
  270. var i int
  271. for idx, b := range splitData {
  272. if !bytes.HasPrefix(b, []byte("%")) {
  273. i = idx // - 1
  274. break
  275. }
  276. }
  277. data = bytes.Join(splitData[0:i], []byte("\n"))
  278. consumed := len(data)
  279. data = bytes.TrimPrefix(data, []byte("% "))
  280. data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
  281. block := p.addBlock(Heading, data)
  282. block.Level = 1
  283. block.IsTitleblock = true
  284. return consumed
  285. }
  286. func (p *Markdown) html(data []byte, doRender bool) int {
  287. var i, j int
  288. // identify the opening tag
  289. if data[0] != '<' {
  290. return 0
  291. }
  292. curtag, tagfound := p.htmlFindTag(data[1:])
  293. // handle special cases
  294. if !tagfound {
  295. // check for an HTML comment
  296. if size := p.htmlComment(data, doRender); size > 0 {
  297. return size
  298. }
  299. // check for an <hr> tag
  300. if size := p.htmlHr(data, doRender); size > 0 {
  301. return size
  302. }
  303. // no special case recognized
  304. return 0
  305. }
  306. // look for an unindented matching closing tag
  307. // followed by a blank line
  308. found := false
  309. /*
  310. closetag := []byte("\n</" + curtag + ">")
  311. j = len(curtag) + 1
  312. for !found {
  313. // scan for a closing tag at the beginning of a line
  314. if skip := bytes.Index(data[j:], closetag); skip >= 0 {
  315. j += skip + len(closetag)
  316. } else {
  317. break
  318. }
  319. // see if it is the only thing on the line
  320. if skip := p.isEmpty(data[j:]); skip > 0 {
  321. // see if it is followed by a blank line/eof
  322. j += skip
  323. if j >= len(data) {
  324. found = true
  325. i = j
  326. } else {
  327. if skip := p.isEmpty(data[j:]); skip > 0 {
  328. j += skip
  329. found = true
  330. i = j
  331. }
  332. }
  333. }
  334. }
  335. */
  336. // if not found, try a second pass looking for indented match
  337. // but not if tag is "ins" or "del" (following original Markdown.pl)
  338. if !found && curtag != "ins" && curtag != "del" {
  339. i = 1
  340. for i < len(data) {
  341. i++
  342. for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
  343. i++
  344. }
  345. if i+2+len(curtag) >= len(data) {
  346. break
  347. }
  348. j = p.htmlFindEnd(curtag, data[i-1:])
  349. if j > 0 {
  350. i += j - 1
  351. found = true
  352. break
  353. }
  354. }
  355. }
  356. if !found {
  357. return 0
  358. }
  359. // the end of the block has been found
  360. if doRender {
  361. // trim newlines
  362. end := i
  363. for end > 0 && data[end-1] == '\n' {
  364. end--
  365. }
  366. finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
  367. }
  368. return i
  369. }
  370. func finalizeHTMLBlock(block *Node) {
  371. block.Literal = block.content
  372. block.content = nil
  373. }
  374. // HTML comment, lax form
  375. func (p *Markdown) htmlComment(data []byte, doRender bool) int {
  376. i := p.inlineHTMLComment(data)
  377. // needs to end with a blank line
  378. if j := p.isEmpty(data[i:]); j > 0 {
  379. size := i + j
  380. if doRender {
  381. // trim trailing newlines
  382. end := size
  383. for end > 0 && data[end-1] == '\n' {
  384. end--
  385. }
  386. block := p.addBlock(HTMLBlock, data[:end])
  387. finalizeHTMLBlock(block)
  388. }
  389. return size
  390. }
  391. return 0
  392. }
  393. // HR, which is the only self-closing block tag considered
  394. func (p *Markdown) htmlHr(data []byte, doRender bool) int {
  395. if len(data) < 4 {
  396. return 0
  397. }
  398. if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
  399. return 0
  400. }
  401. if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
  402. // not an <hr> tag after all; at least not a valid one
  403. return 0
  404. }
  405. i := 3
  406. for i < len(data) && data[i] != '>' && data[i] != '\n' {
  407. i++
  408. }
  409. if i < len(data) && data[i] == '>' {
  410. i++
  411. if j := p.isEmpty(data[i:]); j > 0 {
  412. size := i + j
  413. if doRender {
  414. // trim newlines
  415. end := size
  416. for end > 0 && data[end-1] == '\n' {
  417. end--
  418. }
  419. finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end]))
  420. }
  421. return size
  422. }
  423. }
  424. return 0
  425. }
  426. func (p *Markdown) htmlFindTag(data []byte) (string, bool) {
  427. i := 0
  428. for i < len(data) && isalnum(data[i]) {
  429. i++
  430. }
  431. key := string(data[:i])
  432. if _, ok := blockTags[key]; ok {
  433. return key, true
  434. }
  435. return "", false
  436. }
  437. func (p *Markdown) htmlFindEnd(tag string, data []byte) int {
  438. // assume data[0] == '<' && data[1] == '/' already tested
  439. if tag == "hr" {
  440. return 2
  441. }
  442. // check if tag is a match
  443. closetag := []byte("</" + tag + ">")
  444. if !bytes.HasPrefix(data, closetag) {
  445. return 0
  446. }
  447. i := len(closetag)
  448. // check that the rest of the line is blank
  449. skip := 0
  450. if skip = p.isEmpty(data[i:]); skip == 0 {
  451. return 0
  452. }
  453. i += skip
  454. skip = 0
  455. if i >= len(data) {
  456. return i
  457. }
  458. if p.extensions&LaxHTMLBlocks != 0 {
  459. return i
  460. }
  461. if skip = p.isEmpty(data[i:]); skip == 0 {
  462. // following line must be blank
  463. return 0
  464. }
  465. return i + skip
  466. }
  467. func (*Markdown) isEmpty(data []byte) int {
  468. // it is okay to call isEmpty on an empty buffer
  469. if len(data) == 0 {
  470. return 0
  471. }
  472. var i int
  473. for i = 0; i < len(data) && data[i] != '\n'; i++ {
  474. if data[i] != ' ' && data[i] != '\t' {
  475. return 0
  476. }
  477. }
  478. if i < len(data) && data[i] == '\n' {
  479. i++
  480. }
  481. return i
  482. }
  483. func (*Markdown) isHRule(data []byte) bool {
  484. i := 0
  485. // skip up to three spaces
  486. for i < 3 && data[i] == ' ' {
  487. i++
  488. }
  489. // look at the hrule char
  490. if data[i] != '*' && data[i] != '-' && data[i] != '_' {
  491. return false
  492. }
  493. c := data[i]
  494. // the whole line must be the char or whitespace
  495. n := 0
  496. for i < len(data) && data[i] != '\n' {
  497. switch {
  498. case data[i] == c:
  499. n++
  500. case data[i] != ' ':
  501. return false
  502. }
  503. i++
  504. }
  505. return n >= 3
  506. }
  507. // isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
  508. // and returns the end index if so, or 0 otherwise. It also returns the marker found.
  509. // If syntax is not nil, it gets set to the syntax specified in the fence line.
  510. func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
  511. i, size := 0, 0
  512. // skip up to three spaces
  513. for i < len(data) && i < 3 && data[i] == ' ' {
  514. i++
  515. }
  516. // check for the marker characters: ~ or `
  517. if i >= len(data) {
  518. return 0, ""
  519. }
  520. if data[i] != '~' && data[i] != '`' {
  521. return 0, ""
  522. }
  523. c := data[i]
  524. // the whole line must be the same char or whitespace
  525. for i < len(data) && data[i] == c {
  526. size++
  527. i++
  528. }
  529. // the marker char must occur at least 3 times
  530. if size < 3 {
  531. return 0, ""
  532. }
  533. marker = string(data[i-size : i])
  534. // if this is the end marker, it must match the beginning marker
  535. if oldmarker != "" && marker != oldmarker {
  536. return 0, ""
  537. }
  538. // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
  539. // into one, always get the syntax, and discard it if the caller doesn't care.
  540. if syntax != nil {
  541. syn := 0
  542. i = skipChar(data, i, ' ')
  543. if i >= len(data) {
  544. if i == len(data) {
  545. return i, marker
  546. }
  547. return 0, ""
  548. }
  549. syntaxStart := i
  550. if data[i] == '{' {
  551. i++
  552. syntaxStart++
  553. for i < len(data) && data[i] != '}' && data[i] != '\n' {
  554. syn++
  555. i++
  556. }
  557. if i >= len(data) || data[i] != '}' {
  558. return 0, ""
  559. }
  560. // strip all whitespace at the beginning and the end
  561. // of the {} block
  562. for syn > 0 && isspace(data[syntaxStart]) {
  563. syntaxStart++
  564. syn--
  565. }
  566. for syn > 0 && isspace(data[syntaxStart+syn-1]) {
  567. syn--
  568. }
  569. i++
  570. } else {
  571. for i < len(data) && !isspace(data[i]) {
  572. syn++
  573. i++
  574. }
  575. }
  576. *syntax = string(data[syntaxStart : syntaxStart+syn])
  577. }
  578. i = skipChar(data, i, ' ')
  579. if i >= len(data) || data[i] != '\n' {
  580. if i == len(data) {
  581. return i, marker
  582. }
  583. return 0, ""
  584. }
  585. return i + 1, marker // Take newline into account.
  586. }
  587. // fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
  588. // or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
  589. // If doRender is true, a final newline is mandatory to recognize the fenced code block.
  590. func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int {
  591. var syntax string
  592. beg, marker := isFenceLine(data, &syntax, "")
  593. if beg == 0 || beg >= len(data) {
  594. return 0
  595. }
  596. var work bytes.Buffer
  597. work.Write([]byte(syntax))
  598. work.WriteByte('\n')
  599. for {
  600. // safe to assume beg < len(data)
  601. // check for the end of the code block
  602. fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
  603. if fenceEnd != 0 {
  604. beg += fenceEnd
  605. break
  606. }
  607. // copy the current line
  608. end := skipUntilChar(data, beg, '\n') + 1
  609. // did we reach the end of the buffer without a closing marker?
  610. if end >= len(data) {
  611. return 0
  612. }
  613. // verbatim copy to the working buffer
  614. if doRender {
  615. work.Write(data[beg:end])
  616. }
  617. beg = end
  618. }
  619. if doRender {
  620. block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
  621. block.IsFenced = true
  622. finalizeCodeBlock(block)
  623. }
  624. return beg
  625. }
  626. func unescapeChar(str []byte) []byte {
  627. if str[0] == '\\' {
  628. return []byte{str[1]}
  629. }
  630. return []byte(html.UnescapeString(string(str)))
  631. }
  632. func unescapeString(str []byte) []byte {
  633. if reBackslashOrAmp.Match(str) {
  634. return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
  635. }
  636. return str
  637. }
  638. func finalizeCodeBlock(block *Node) {
  639. if block.IsFenced {
  640. newlinePos := bytes.IndexByte(block.content, '\n')
  641. firstLine := block.content[:newlinePos]
  642. rest := block.content[newlinePos+1:]
  643. block.Info = unescapeString(bytes.Trim(firstLine, "\n"))
  644. block.Literal = rest
  645. } else {
  646. block.Literal = block.content
  647. }
  648. block.content = nil
  649. }
  650. func (p *Markdown) table(data []byte) int {
  651. table := p.addBlock(Table, nil)
  652. i, columns := p.tableHeader(data)
  653. if i == 0 {
  654. p.tip = table.Parent
  655. table.Unlink()
  656. return 0
  657. }
  658. p.addBlock(TableBody, nil)
  659. for i < len(data) {
  660. pipes, rowStart := 0, i
  661. for ; i < len(data) && data[i] != '\n'; i++ {
  662. if data[i] == '|' {
  663. pipes++
  664. }
  665. }
  666. if pipes == 0 {
  667. i = rowStart
  668. break
  669. }
  670. // include the newline in data sent to tableRow
  671. if i < len(data) && data[i] == '\n' {
  672. i++
  673. }
  674. p.tableRow(data[rowStart:i], columns, false)
  675. }
  676. return i
  677. }
  678. // check if the specified position is preceded by an odd number of backslashes
  679. func isBackslashEscaped(data []byte, i int) bool {
  680. backslashes := 0
  681. for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
  682. backslashes++
  683. }
  684. return backslashes&1 == 1
  685. }
  686. func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) {
  687. i := 0
  688. colCount := 1
  689. for i = 0; i < len(data) && data[i] != '\n'; i++ {
  690. if data[i] == '|' && !isBackslashEscaped(data, i) {
  691. colCount++
  692. }
  693. }
  694. // doesn't look like a table header
  695. if colCount == 1 {
  696. return
  697. }
  698. // include the newline in the data sent to tableRow
  699. j := i
  700. if j < len(data) && data[j] == '\n' {
  701. j++
  702. }
  703. header := data[:j]
  704. // column count ignores pipes at beginning or end of line
  705. if data[0] == '|' {
  706. colCount--
  707. }
  708. if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
  709. colCount--
  710. }
  711. columns = make([]CellAlignFlags, colCount)
  712. // move on to the header underline
  713. i++
  714. if i >= len(data) {
  715. return
  716. }
  717. if data[i] == '|' && !isBackslashEscaped(data, i) {
  718. i++
  719. }
  720. i = skipChar(data, i, ' ')
  721. // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
  722. // and trailing | optional on last column
  723. col := 0
  724. for i < len(data) && data[i] != '\n' {
  725. dashes := 0
  726. if data[i] == ':' {
  727. i++
  728. columns[col] |= TableAlignmentLeft
  729. dashes++
  730. }
  731. for i < len(data) && data[i] == '-' {
  732. i++
  733. dashes++
  734. }
  735. if i < len(data) && data[i] == ':' {
  736. i++
  737. columns[col] |= TableAlignmentRight
  738. dashes++
  739. }
  740. for i < len(data) && data[i] == ' ' {
  741. i++
  742. }
  743. if i == len(data) {
  744. return
  745. }
  746. // end of column test is messy
  747. switch {
  748. case dashes < 3:
  749. // not a valid column
  750. return
  751. case data[i] == '|' && !isBackslashEscaped(data, i):
  752. // marker found, now skip past trailing whitespace
  753. col++
  754. i++
  755. for i < len(data) && data[i] == ' ' {
  756. i++
  757. }
  758. // trailing junk found after last column
  759. if col >= colCount && i < len(data) && data[i] != '\n' {
  760. return
  761. }
  762. case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
  763. // something else found where marker was required
  764. return
  765. case data[i] == '\n':
  766. // marker is optional for the last column
  767. col++
  768. default:
  769. // trailing junk found after last column
  770. return
  771. }
  772. }
  773. if col != colCount {
  774. return
  775. }
  776. p.addBlock(TableHead, nil)
  777. p.tableRow(header, columns, true)
  778. size = i
  779. if size < len(data) && data[size] == '\n' {
  780. size++
  781. }
  782. return
  783. }
  784. func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) {
  785. p.addBlock(TableRow, nil)
  786. i, col := 0, 0
  787. if data[i] == '|' && !isBackslashEscaped(data, i) {
  788. i++
  789. }
  790. for col = 0; col < len(columns) && i < len(data); col++ {
  791. for i < len(data) && data[i] == ' ' {
  792. i++
  793. }
  794. cellStart := i
  795. for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
  796. i++
  797. }
  798. cellEnd := i
  799. // skip the end-of-cell marker, possibly taking us past end of buffer
  800. i++
  801. for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' {
  802. cellEnd--
  803. }
  804. cell := p.addBlock(TableCell, data[cellStart:cellEnd])
  805. cell.IsHeader = header
  806. cell.Align = columns[col]
  807. }
  808. // pad it out with empty columns to get the right number
  809. for ; col < len(columns); col++ {
  810. cell := p.addBlock(TableCell, nil)
  811. cell.IsHeader = header
  812. cell.Align = columns[col]
  813. }
  814. // silently ignore rows with too many cells
  815. }
  816. // returns blockquote prefix length
  817. func (p *Markdown) quotePrefix(data []byte) int {
  818. i := 0
  819. for i < 3 && i < len(data) && data[i] == ' ' {
  820. i++
  821. }
  822. if i < len(data) && data[i] == '>' {
  823. if i+1 < len(data) && data[i+1] == ' ' {
  824. return i + 2
  825. }
  826. return i + 1
  827. }
  828. return 0
  829. }
  830. // blockquote ends with at least one blank line
  831. // followed by something without a blockquote prefix
  832. func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool {
  833. if p.isEmpty(data[beg:]) <= 0 {
  834. return false
  835. }
  836. if end >= len(data) {
  837. return true
  838. }
  839. return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
  840. }
  841. // parse a blockquote fragment
  842. func (p *Markdown) quote(data []byte) int {
  843. block := p.addBlock(BlockQuote, nil)
  844. var raw bytes.Buffer
  845. beg, end := 0, 0
  846. for beg < len(data) {
  847. end = beg
  848. // Step over whole lines, collecting them. While doing that, check for
  849. // fenced code and if one's found, incorporate it altogether,
  850. // irregardless of any contents inside it
  851. for end < len(data) && data[end] != '\n' {
  852. if p.extensions&FencedCode != 0 {
  853. if i := p.fencedCodeBlock(data[end:], false); i > 0 {
  854. // -1 to compensate for the extra end++ after the loop:
  855. end += i - 1
  856. break
  857. }
  858. }
  859. end++
  860. }
  861. if end < len(data) && data[end] == '\n' {
  862. end++
  863. }
  864. if pre := p.quotePrefix(data[beg:]); pre > 0 {
  865. // skip the prefix
  866. beg += pre
  867. } else if p.terminateBlockquote(data, beg, end) {
  868. break
  869. }
  870. // this line is part of the blockquote
  871. raw.Write(data[beg:end])
  872. beg = end
  873. }
  874. p.block(raw.Bytes())
  875. p.finalize(block)
  876. return end
  877. }
  878. // returns prefix length for block code
  879. func (p *Markdown) codePrefix(data []byte) int {
  880. if len(data) >= 1 && data[0] == '\t' {
  881. return 1
  882. }
  883. if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
  884. return 4
  885. }
  886. return 0
  887. }
  888. func (p *Markdown) code(data []byte) int {
  889. var work bytes.Buffer
  890. i := 0
  891. for i < len(data) {
  892. beg := i
  893. for i < len(data) && data[i] != '\n' {
  894. i++
  895. }
  896. if i < len(data) && data[i] == '\n' {
  897. i++
  898. }
  899. blankline := p.isEmpty(data[beg:i]) > 0
  900. if pre := p.codePrefix(data[beg:i]); pre > 0 {
  901. beg += pre
  902. } else if !blankline {
  903. // non-empty, non-prefixed line breaks the pre
  904. i = beg
  905. break
  906. }
  907. // verbatim copy to the working buffer
  908. if blankline {
  909. work.WriteByte('\n')
  910. } else {
  911. work.Write(data[beg:i])
  912. }
  913. }
  914. // trim all the \n off the end of work
  915. workbytes := work.Bytes()
  916. eol := len(workbytes)
  917. for eol > 0 && workbytes[eol-1] == '\n' {
  918. eol--
  919. }
  920. if eol != len(workbytes) {
  921. work.Truncate(eol)
  922. }
  923. work.WriteByte('\n')
  924. block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer
  925. block.IsFenced = false
  926. finalizeCodeBlock(block)
  927. return i
  928. }
  929. // returns unordered list item prefix
  930. func (p *Markdown) uliPrefix(data []byte) int {
  931. i := 0
  932. // start with up to 3 spaces
  933. for i < len(data) && i < 3 && data[i] == ' ' {
  934. i++
  935. }
  936. if i >= len(data)-1 {
  937. return 0
  938. }
  939. // need one of {'*', '+', '-'} followed by a space or a tab
  940. if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
  941. (data[i+1] != ' ' && data[i+1] != '\t') {
  942. return 0
  943. }
  944. return i + 2
  945. }
  946. // returns ordered list item prefix
  947. func (p *Markdown) oliPrefix(data []byte) int {
  948. i := 0
  949. // start with up to 3 spaces
  950. for i < 3 && i < len(data) && data[i] == ' ' {
  951. i++
  952. }
  953. // count the digits
  954. start := i
  955. for i < len(data) && data[i] >= '0' && data[i] <= '9' {
  956. i++
  957. }
  958. if start == i || i >= len(data)-1 {
  959. return 0
  960. }
  961. // we need >= 1 digits followed by a dot and a space or a tab
  962. if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
  963. return 0
  964. }
  965. return i + 2
  966. }
  967. // returns definition list item prefix
  968. func (p *Markdown) dliPrefix(data []byte) int {
  969. if len(data) < 2 {
  970. return 0
  971. }
  972. i := 0
  973. // need a ':' followed by a space or a tab
  974. if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') {
  975. return 0
  976. }
  977. for i < len(data) && data[i] == ' ' {
  978. i++
  979. }
  980. return i + 2
  981. }
  982. // parse ordered or unordered list block
  983. func (p *Markdown) list(data []byte, flags ListType) int {
  984. i := 0
  985. flags |= ListItemBeginningOfList
  986. block := p.addBlock(List, nil)
  987. block.ListFlags = flags
  988. block.Tight = true
  989. for i < len(data) {
  990. skip := p.listItem(data[i:], &flags)
  991. if flags&ListItemContainsBlock != 0 {
  992. block.ListData.Tight = false
  993. }
  994. i += skip
  995. if skip == 0 || flags&ListItemEndOfList != 0 {
  996. break
  997. }
  998. flags &= ^ListItemBeginningOfList
  999. }
  1000. above := block.Parent
  1001. finalizeList(block)
  1002. p.tip = above
  1003. return i
  1004. }
  1005. // Returns true if block ends with a blank line, descending if needed
  1006. // into lists and sublists.
  1007. func endsWithBlankLine(block *Node) bool {
  1008. // TODO: figure this out. Always false now.
  1009. for block != nil {
  1010. //if block.lastLineBlank {
  1011. //return true
  1012. //}
  1013. t := block.Type
  1014. if t == List || t == Item {
  1015. block = block.LastChild
  1016. } else {
  1017. break
  1018. }
  1019. }
  1020. return false
  1021. }
  1022. func finalizeList(block *Node) {
  1023. block.open = false
  1024. item := block.FirstChild
  1025. for item != nil {
  1026. // check for non-final list item ending with blank line:
  1027. if endsWithBlankLine(item) && item.Next != nil {
  1028. block.ListData.Tight = false
  1029. break
  1030. }
  1031. // recurse into children of list item, to see if there are spaces
  1032. // between any of them:
  1033. subItem := item.FirstChild
  1034. for subItem != nil {
  1035. if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) {
  1036. block.ListData.Tight = false
  1037. break
  1038. }
  1039. subItem = subItem.Next
  1040. }
  1041. item = item.Next
  1042. }
  1043. }
  1044. // Parse a single list item.
  1045. // Assumes initial prefix is already removed if this is a sublist.
  1046. func (p *Markdown) listItem(data []byte, flags *ListType) int {
  1047. // keep track of the indentation of the first line
  1048. itemIndent := 0
  1049. if data[0] == '\t' {
  1050. itemIndent += 4
  1051. } else {
  1052. for itemIndent < 3 && data[itemIndent] == ' ' {
  1053. itemIndent++
  1054. }
  1055. }
  1056. var bulletChar byte = '*'
  1057. i := p.uliPrefix(data)
  1058. if i == 0 {
  1059. i = p.oliPrefix(data)
  1060. } else {
  1061. bulletChar = data[i-2]
  1062. }
  1063. if i == 0 {
  1064. i = p.dliPrefix(data)
  1065. // reset definition term flag
  1066. if i > 0 {
  1067. *flags &= ^ListTypeTerm
  1068. }
  1069. }
  1070. if i == 0 {
  1071. // if in definition list, set term flag and continue
  1072. if *flags&ListTypeDefinition != 0 {
  1073. *flags |= ListTypeTerm
  1074. } else {
  1075. return 0
  1076. }
  1077. }
  1078. // skip leading whitespace on first line
  1079. for i < len(data) && data[i] == ' ' {
  1080. i++
  1081. }
  1082. // find the end of the line
  1083. line := i
  1084. for i > 0 && i < len(data) && data[i-1] != '\n' {
  1085. i++
  1086. }
  1087. // get working buffer
  1088. var raw bytes.Buffer
  1089. // put the first line into the working buffer
  1090. raw.Write(data[line:i])
  1091. line = i
  1092. // process the following lines
  1093. containsBlankLine := false
  1094. sublist := 0
  1095. gatherlines:
  1096. for line < len(data) {
  1097. i++
  1098. // find the end of this line
  1099. for i < len(data) && data[i-1] != '\n' {
  1100. i++
  1101. }
  1102. // if it is an empty line, guess that it is part of this item
  1103. // and move on to the next line
  1104. if p.isEmpty(data[line:i]) > 0 {
  1105. containsBlankLine = true
  1106. line = i
  1107. continue
  1108. }
  1109. // calculate the indentation
  1110. indent := 0
  1111. indentIndex := 0
  1112. if data[line] == '\t' {
  1113. indentIndex++
  1114. indent += 4
  1115. } else {
  1116. for indent < 4 && line+indent < i && data[line+indent] == ' ' {
  1117. indent++
  1118. indentIndex++
  1119. }
  1120. }
  1121. chunk := data[line+indentIndex : i]
  1122. // evaluate how this line fits in
  1123. switch {
  1124. // is this a nested list item?
  1125. case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
  1126. p.oliPrefix(chunk) > 0 ||
  1127. p.dliPrefix(chunk) > 0:
  1128. if containsBlankLine {
  1129. *flags |= ListItemContainsBlock
  1130. }
  1131. // to be a nested list, it must be indented more
  1132. // if not, it is the next item in the same list
  1133. if indent <= itemIndent {
  1134. break gatherlines
  1135. }
  1136. // is this the first item in the nested list?
  1137. if sublist == 0 {
  1138. sublist = raw.Len()
  1139. }
  1140. // is this a nested prefix heading?
  1141. case p.isPrefixHeading(chunk):
  1142. // if the heading is not indented, it is not nested in the list
  1143. // and thus ends the list
  1144. if containsBlankLine && indent < 4 {
  1145. *flags |= ListItemEndOfList
  1146. break gatherlines
  1147. }
  1148. *flags |= ListItemContainsBlock
  1149. // anything following an empty line is only part
  1150. // of this item if it is indented 4 spaces
  1151. // (regardless of the indentation of the beginning of the item)
  1152. case containsBlankLine && indent < 4:
  1153. if *flags&ListTypeDefinition != 0 && i < len(data)-1 {
  1154. // is the next item still a part of this list?
  1155. next := i
  1156. for next < len(data) && data[next] != '\n' {
  1157. next++
  1158. }
  1159. for next < len(data)-1 && data[next] == '\n' {
  1160. next++
  1161. }
  1162. if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
  1163. *flags |= ListItemEndOfList
  1164. }
  1165. } else {
  1166. *flags |= ListItemEndOfList
  1167. }
  1168. break gatherlines
  1169. // a blank line means this should be parsed as a block
  1170. case containsBlankLine:
  1171. raw.WriteByte('\n')
  1172. *flags |= ListItemContainsBlock
  1173. }
  1174. // if this line was preceded by one or more blanks,
  1175. // re-introduce the blank into the buffer
  1176. if containsBlankLine {
  1177. containsBlankLine = false
  1178. raw.WriteByte('\n')
  1179. }
  1180. // add the line into the working buffer without prefix
  1181. raw.Write(data[line+indentIndex : i])
  1182. line = i
  1183. }
  1184. rawBytes := raw.Bytes()
  1185. block := p.addBlock(Item, nil)
  1186. block.ListFlags = *flags
  1187. block.Tight = false
  1188. block.BulletChar = bulletChar
  1189. block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
  1190. // render the contents of the list item
  1191. if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 {
  1192. // intermediate render of block item, except for definition term
  1193. if sublist > 0 {
  1194. p.block(rawBytes[:sublist])
  1195. p.block(rawBytes[sublist:])
  1196. } else {
  1197. p.block(rawBytes)
  1198. }
  1199. } else {
  1200. // intermediate render of inline item
  1201. if sublist > 0 {
  1202. child := p.addChild(Paragraph, 0)
  1203. child.content = rawBytes[:sublist]
  1204. p.block(rawBytes[sublist:])
  1205. } else {
  1206. child := p.addChild(Paragraph, 0)
  1207. child.content = rawBytes
  1208. }
  1209. }
  1210. return line
  1211. }
  1212. // render a single paragraph that has already been parsed out
  1213. func (p *Markdown) renderParagraph(data []byte) {
  1214. if len(data) == 0 {
  1215. return
  1216. }
  1217. // trim leading spaces
  1218. beg := 0
  1219. for data[beg] == ' ' {
  1220. beg++
  1221. }
  1222. end := len(data)
  1223. // trim trailing newline
  1224. if data[len(data)-1] == '\n' {
  1225. end--
  1226. }
  1227. // trim trailing spaces
  1228. for end > beg && data[end-1] == ' ' {
  1229. end--
  1230. }
  1231. p.addBlock(Paragraph, data[beg:end])
  1232. }
  1233. func (p *Markdown) paragraph(data []byte) int {
  1234. // prev: index of 1st char of previous line
  1235. // line: index of 1st char of current line
  1236. // i: index of cursor/end of current line
  1237. var prev, line, i int
  1238. tabSize := TabSizeDefault
  1239. if p.extensions&TabSizeEight != 0 {
  1240. tabSize = TabSizeDouble
  1241. }
  1242. // keep going until we find something to mark the end of the paragraph
  1243. for i < len(data) {
  1244. // mark the beginning of the current line
  1245. prev = line
  1246. current := data[i:]
  1247. line = i
  1248. // did we find a reference or a footnote? If so, end a paragraph
  1249. // preceding it and report that we have consumed up to the end of that
  1250. // reference:
  1251. if refEnd := isReference(p, current, tabSize); refEnd > 0 {
  1252. p.renderParagraph(data[:i])
  1253. return i + refEnd
  1254. }
  1255. // did we find a blank line marking the end of the paragraph?
  1256. if n := p.isEmpty(current); n > 0 {
  1257. // did this blank line followed by a definition list item?
  1258. if p.extensions&DefinitionLists != 0 {
  1259. if i < len(data)-1 && data[i+1] == ':' {
  1260. return p.list(data[prev:], ListTypeDefinition)
  1261. }
  1262. }
  1263. p.renderParagraph(data[:i])
  1264. return i + n
  1265. }
  1266. // an underline under some text marks a heading, so our paragraph ended on prev line
  1267. if i > 0 {
  1268. if level := p.isUnderlinedHeading(current); level > 0 {
  1269. // render the paragraph
  1270. p.renderParagraph(data[:prev])
  1271. // ignore leading and trailing whitespace
  1272. eol := i - 1
  1273. for prev < eol && data[prev] == ' ' {
  1274. prev++
  1275. }
  1276. for eol > prev && data[eol-1] == ' ' {
  1277. eol--
  1278. }
  1279. id := ""
  1280. if p.extensions&AutoHeadingIDs != 0 {
  1281. id = sanitized_anchor_name.Create(string(data[prev:eol]))
  1282. }
  1283. block := p.addBlock(Heading, data[prev:eol])
  1284. block.Level = level
  1285. block.HeadingID = id
  1286. // find the end of the underline
  1287. for i < len(data) && data[i] != '\n' {
  1288. i++
  1289. }
  1290. return i
  1291. }
  1292. }
  1293. // if the next line starts a block of HTML, then the paragraph ends here
  1294. if p.extensions&LaxHTMLBlocks != 0 {
  1295. if data[i] == '<' && p.html(current, false) > 0 {
  1296. // rewind to before the HTML block
  1297. p.renderParagraph(data[:i])
  1298. return i
  1299. }
  1300. }
  1301. // if there's a prefixed heading or a horizontal rule after this, paragraph is over
  1302. if p.isPrefixHeading(current) || p.isHRule(current) {
  1303. p.renderParagraph(data[:i])
  1304. return i
  1305. }
  1306. // if there's a fenced code block, paragraph is over
  1307. if p.extensions&FencedCode != 0 {
  1308. if p.fencedCodeBlock(current, false) > 0 {
  1309. p.renderParagraph(data[:i])
  1310. return i
  1311. }
  1312. }
  1313. // if there's a definition list item, prev line is a definition term
  1314. if p.extensions&DefinitionLists != 0 {
  1315. if p.dliPrefix(current) != 0 {
  1316. ret := p.list(data[prev:], ListTypeDefinition)
  1317. return ret
  1318. }
  1319. }
  1320. // if there's a list after this, paragraph is over
  1321. if p.extensions&NoEmptyLineBeforeBlock != 0 {
  1322. if p.uliPrefix(current) != 0 ||
  1323. p.oliPrefix(current) != 0 ||
  1324. p.quotePrefix(current) != 0 ||
  1325. p.codePrefix(current) != 0 {
  1326. p.renderParagraph(data[:i])
  1327. return i
  1328. }
  1329. }
  1330. // otherwise, scan to the beginning of the next line
  1331. nl := bytes.IndexByte(data[i:], '\n')
  1332. if nl >= 0 {
  1333. i += nl + 1
  1334. } else {
  1335. i += len(data[i:])
  1336. }
  1337. }
  1338. p.renderParagraph(data[:i])
  1339. return i
  1340. }
  1341. func skipChar(data []byte, start int, char byte) int {
  1342. i := start
  1343. for i < len(data) && data[i] == char {
  1344. i++
  1345. }
  1346. return i
  1347. }
  1348. func skipUntilChar(text []byte, start int, char byte) int {
  1349. i := start
  1350. for i < len(text) && text[i] != char {
  1351. i++
  1352. }
  1353. return i
  1354. }