-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizer.go
More file actions
123 lines (106 loc) · 2.8 KB
/
tokenizer.go
File metadata and controls
123 lines (106 loc) · 2.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package GoHtml
import (
"io"
"strings"
"github.com/emirpasic/gods/stacks/linkedliststack"
"golang.org/x/net/html"
)
// Tokenizer contains a *html.Tokenizer.
type Tokenizer struct {
z *html.Tokenizer
}
// NewTokenizer returns a new Tokenizer.
// r must not be nil
func NewTokenizer(r io.Reader) Tokenizer {
return Tokenizer{
z: html.NewTokenizer(r),
}
}
// Advanced scans the next token and returns its type.
func (t *Tokenizer) Advanced() html.TokenType {
return t.z.Next()
}
// CurrentNode returns the current node.
// Returned value can be nil regardless of token type.
func (t *Tokenizer) GetCurrentNode() *Node {
currentToken := t.z.Token()
if strings.TrimSpace(currentToken.Data) == "" {
return nil
}
// token data depend on the token type.
switch currentToken.Type {
case html.DoctypeToken, html.StartTagToken, html.SelfClosingTagToken, html.TextToken:
var node *Node
switch currentToken.Type {
case html.TextToken:
node = CreateTextNode(currentToken.Data)
case html.DoctypeToken:
node = CreateNode(DOCTYPEDTD)
node.SetAttribute(currentToken.Data, "")
default:
node = CreateNode(currentToken.Data)
for _, v := range currentToken.Attr {
node.SetAttribute(v.Key, v.Val)
}
}
return node
}
return nil
}
// NodeTreeBuilder is used to build a node tree given a node and it's type.
type NodeTreeBuilder struct {
rootNode *Node
stack *linkedliststack.Stack
currentNode *Node
}
// NewNodeTreeBuilder returns a new NodeTreeBuilder.
func NewNodeTreeBuilder() NodeTreeBuilder {
rootNode := CreateTextNode("")
return NodeTreeBuilder{
rootNode: rootNode,
currentNode: rootNode,
stack: linkedliststack.New(),
}
}
// WriteNodeTree append the node given html.TokenType.
func (ntb *NodeTreeBuilder) WriteNodeTree(node *Node, tt html.TokenType) {
switch tt {
case html.EndTagToken:
val, ok := ntb.stack.Pop()
if !ok || val == nil {
return
}
ntb.currentNode = val.(*Node)
case html.DoctypeToken, html.StartTagToken, html.SelfClosingTagToken, html.TextToken:
if node == nil {
return
}
if isTopNode(ntb.currentNode, ntb.stack) {
ntb.currentNode.AppendChild(node)
} else {
ntb.currentNode.Append(node)
}
if !node.IsTextNode() && !IsVoidTag(node.GetTagName()) {
ntb.stack.Push(node)
}
ntb.currentNode = node
}
}
// GetRootNode returns the root node of the accumulated node tree and resets the NodeTreeBuilder.
func (ntb *NodeTreeBuilder) GetRootNode() *Node {
node := ntb.rootNode.GetNextNode()
ntb.rootNode.RemoveNode()
rootNode := CreateTextNode("")
ntb.rootNode = rootNode
ntb.currentNode = rootNode
ntb.stack = linkedliststack.New()
return node
}
func isTopNode(node *Node, stack *linkedliststack.Stack) bool {
val, ok := stack.Peek()
if !ok || val == nil {
return false
}
topNode := val.(*Node)
return topNode == node
}