node.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. package blackfriday
  2. import (
  3. "bytes"
  4. "fmt"
  5. )
  6. // NodeType specifies a type of a single node of a syntax tree. Usually one
  7. // node (and its type) corresponds to a single markdown feature, e.g. emphasis
  8. // or code block.
  9. type NodeType int
  10. // Constants for identifying different types of nodes. See NodeType.
  11. const (
  12. Document NodeType = iota
  13. BlockQuote
  14. List
  15. Item
  16. Paragraph
  17. Heading
  18. HorizontalRule
  19. Emph
  20. Strong
  21. Del
  22. Link
  23. Image
  24. Text
  25. HTMLBlock
  26. CodeBlock
  27. Softbreak
  28. Hardbreak
  29. Code
  30. HTMLSpan
  31. Table
  32. TableCell
  33. TableHead
  34. TableBody
  35. TableRow
  36. )
  37. var nodeTypeNames = []string{
  38. Document: "Document",
  39. BlockQuote: "BlockQuote",
  40. List: "List",
  41. Item: "Item",
  42. Paragraph: "Paragraph",
  43. Heading: "Heading",
  44. HorizontalRule: "HorizontalRule",
  45. Emph: "Emph",
  46. Strong: "Strong",
  47. Del: "Del",
  48. Link: "Link",
  49. Image: "Image",
  50. Text: "Text",
  51. HTMLBlock: "HTMLBlock",
  52. CodeBlock: "CodeBlock",
  53. Softbreak: "Softbreak",
  54. Hardbreak: "Hardbreak",
  55. Code: "Code",
  56. HTMLSpan: "HTMLSpan",
  57. Table: "Table",
  58. TableCell: "TableCell",
  59. TableHead: "TableHead",
  60. TableBody: "TableBody",
  61. TableRow: "TableRow",
  62. }
  63. func (t NodeType) String() string {
  64. return nodeTypeNames[t]
  65. }
  66. // ListData contains fields relevant to a List and Item node type.
  67. type ListData struct {
  68. ListFlags ListType
  69. Tight bool // Skip <p>s around list item data if true
  70. BulletChar byte // '*', '+' or '-' in bullet lists
  71. Delimiter byte // '.' or ')' after the number in ordered lists
  72. RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
  73. IsFootnotesList bool // This is a list of footnotes
  74. }
  75. // LinkData contains fields relevant to a Link node type.
  76. type LinkData struct {
  77. Destination []byte // Destination is what goes into a href
  78. Title []byte // Title is the tooltip thing that goes in a title attribute
  79. NoteID int // NoteID contains a serial number of a footnote, zero if it's not a footnote
  80. Footnote *Node // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
  81. }
  82. // CodeBlockData contains fields relevant to a CodeBlock node type.
  83. type CodeBlockData struct {
  84. IsFenced bool // Specifies whether it's a fenced code block or an indented one
  85. Info []byte // This holds the info string
  86. FenceChar byte
  87. FenceLength int
  88. FenceOffset int
  89. }
  90. // TableCellData contains fields relevant to a TableCell node type.
  91. type TableCellData struct {
  92. IsHeader bool // This tells if it's under the header row
  93. Align CellAlignFlags // This holds the value for align attribute
  94. }
  95. // HeadingData contains fields relevant to a Heading node type.
  96. type HeadingData struct {
  97. Level int // This holds the heading level number
  98. HeadingID string // This might hold heading ID, if present
  99. IsTitleblock bool // Specifies whether it's a title block
  100. }
  101. // Node is a single element in the abstract syntax tree of the parsed document.
  102. // It holds connections to the structurally neighboring nodes and, for certain
  103. // types of nodes, additional information that might be needed when rendering.
  104. type Node struct {
  105. Type NodeType // Determines the type of the node
  106. Parent *Node // Points to the parent
  107. FirstChild *Node // Points to the first child, if any
  108. LastChild *Node // Points to the last child, if any
  109. Prev *Node // Previous sibling; nil if it's the first child
  110. Next *Node // Next sibling; nil if it's the last child
  111. Literal []byte // Text contents of the leaf nodes
  112. HeadingData // Populated if Type is Heading
  113. ListData // Populated if Type is List
  114. CodeBlockData // Populated if Type is CodeBlock
  115. LinkData // Populated if Type is Link
  116. TableCellData // Populated if Type is TableCell
  117. content []byte // Markdown content of the block nodes
  118. open bool // Specifies an open block node that has not been finished to process yet
  119. }
  120. // NewNode allocates a node of a specified type.
  121. func NewNode(typ NodeType) *Node {
  122. return &Node{
  123. Type: typ,
  124. open: true,
  125. }
  126. }
  127. func (n *Node) String() string {
  128. ellipsis := ""
  129. snippet := n.Literal
  130. if len(snippet) > 16 {
  131. snippet = snippet[:16]
  132. ellipsis = "..."
  133. }
  134. return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
  135. }
  136. // Unlink removes node 'n' from the tree.
  137. // It panics if the node is nil.
  138. func (n *Node) Unlink() {
  139. if n.Prev != nil {
  140. n.Prev.Next = n.Next
  141. } else if n.Parent != nil {
  142. n.Parent.FirstChild = n.Next
  143. }
  144. if n.Next != nil {
  145. n.Next.Prev = n.Prev
  146. } else if n.Parent != nil {
  147. n.Parent.LastChild = n.Prev
  148. }
  149. n.Parent = nil
  150. n.Next = nil
  151. n.Prev = nil
  152. }
  153. // AppendChild adds a node 'child' as a child of 'n'.
  154. // It panics if either node is nil.
  155. func (n *Node) AppendChild(child *Node) {
  156. child.Unlink()
  157. child.Parent = n
  158. if n.LastChild != nil {
  159. n.LastChild.Next = child
  160. child.Prev = n.LastChild
  161. n.LastChild = child
  162. } else {
  163. n.FirstChild = child
  164. n.LastChild = child
  165. }
  166. }
  167. // InsertBefore inserts 'sibling' immediately before 'n'.
  168. // It panics if either node is nil.
  169. func (n *Node) InsertBefore(sibling *Node) {
  170. sibling.Unlink()
  171. sibling.Prev = n.Prev
  172. if sibling.Prev != nil {
  173. sibling.Prev.Next = sibling
  174. }
  175. sibling.Next = n
  176. n.Prev = sibling
  177. sibling.Parent = n.Parent
  178. if sibling.Prev == nil {
  179. sibling.Parent.FirstChild = sibling
  180. }
  181. }
  182. func (n *Node) isContainer() bool {
  183. switch n.Type {
  184. case Document:
  185. fallthrough
  186. case BlockQuote:
  187. fallthrough
  188. case List:
  189. fallthrough
  190. case Item:
  191. fallthrough
  192. case Paragraph:
  193. fallthrough
  194. case Heading:
  195. fallthrough
  196. case Emph:
  197. fallthrough
  198. case Strong:
  199. fallthrough
  200. case Del:
  201. fallthrough
  202. case Link:
  203. fallthrough
  204. case Image:
  205. fallthrough
  206. case Table:
  207. fallthrough
  208. case TableHead:
  209. fallthrough
  210. case TableBody:
  211. fallthrough
  212. case TableRow:
  213. fallthrough
  214. case TableCell:
  215. return true
  216. default:
  217. return false
  218. }
  219. }
  220. func (n *Node) canContain(t NodeType) bool {
  221. if n.Type == List {
  222. return t == Item
  223. }
  224. if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
  225. return t != Item
  226. }
  227. if n.Type == Table {
  228. return t == TableHead || t == TableBody
  229. }
  230. if n.Type == TableHead || n.Type == TableBody {
  231. return t == TableRow
  232. }
  233. if n.Type == TableRow {
  234. return t == TableCell
  235. }
  236. return false
  237. }
  238. // WalkStatus allows NodeVisitor to have some control over the tree traversal.
  239. // It is returned from NodeVisitor and different values allow Node.Walk to
  240. // decide which node to go to next.
  241. type WalkStatus int
  242. const (
  243. // GoToNext is the default traversal of every node.
  244. GoToNext WalkStatus = iota
  245. // SkipChildren tells walker to skip all children of current node.
  246. SkipChildren
  247. // Terminate tells walker to terminate the traversal.
  248. Terminate
  249. )
  250. // NodeVisitor is a callback to be called when traversing the syntax tree.
  251. // Called twice for every node: once with entering=true when the branch is
  252. // first visited, then with entering=false after all the children are done.
  253. type NodeVisitor func(node *Node, entering bool) WalkStatus
  254. // Walk is a convenience method that instantiates a walker and starts a
  255. // traversal of subtree rooted at n.
  256. func (n *Node) Walk(visitor NodeVisitor) {
  257. w := newNodeWalker(n)
  258. for w.current != nil {
  259. status := visitor(w.current, w.entering)
  260. switch status {
  261. case GoToNext:
  262. w.next()
  263. case SkipChildren:
  264. w.entering = false
  265. w.next()
  266. case Terminate:
  267. return
  268. }
  269. }
  270. }
  271. type nodeWalker struct {
  272. current *Node
  273. root *Node
  274. entering bool
  275. }
  276. func newNodeWalker(root *Node) *nodeWalker {
  277. return &nodeWalker{
  278. current: root,
  279. root: root,
  280. entering: true,
  281. }
  282. }
  283. func (nw *nodeWalker) next() {
  284. if (!nw.current.isContainer() || !nw.entering) && nw.current == nw.root {
  285. nw.current = nil
  286. return
  287. }
  288. if nw.entering && nw.current.isContainer() {
  289. if nw.current.FirstChild != nil {
  290. nw.current = nw.current.FirstChild
  291. nw.entering = true
  292. } else {
  293. nw.entering = false
  294. }
  295. } else if nw.current.Next == nil {
  296. nw.current = nw.current.Parent
  297. nw.entering = false
  298. } else {
  299. nw.current = nw.current.Next
  300. nw.entering = true
  301. }
  302. }
  303. func dump(ast *Node) {
  304. fmt.Println(dumpString(ast))
  305. }
  306. func dumpR(ast *Node, depth int) string {
  307. if ast == nil {
  308. return ""
  309. }
  310. indent := bytes.Repeat([]byte("\t"), depth)
  311. content := ast.Literal
  312. if content == nil {
  313. content = ast.content
  314. }
  315. result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
  316. for n := ast.FirstChild; n != nil; n = n.Next {
  317. result += dumpR(n, depth+1)
  318. }
  319. return result
  320. }
  321. func dumpString(ast *Node) string {
  322. return dumpR(ast, 0)
  323. }