@@ -130,22 +130,19 @@ function wrap(tree, file) {
130
130
function element ( node ) {
131
131
var empty = voids . indexOf ( node . tagName ) !== - 1
132
132
133
+ resetTokenizer ( )
133
134
parser . _processToken ( startTag ( node ) , ns . html )
134
135
135
136
all ( node . children )
136
137
137
138
if ( ! empty ) {
139
+ resetTokenizer ( )
138
140
parser . _processToken ( endTag ( node ) )
139
-
140
- // Put the parser back in the data state: some elements, like textareas
141
- // and iframes, change the state.
142
- // See <syntax-tree/hast-util-raw#7>.
143
- // See <https://github.com/inikulin/parse5/blob/2528196/packages/parse5/lib/tokenizer/index.js#L222>.
144
- tokenizer . state = dataState
145
141
}
146
142
}
147
143
148
144
function text ( node ) {
145
+ resetTokenizer ( )
149
146
parser . _processToken ( {
150
147
type : characterToken ,
151
148
chars : node . value ,
@@ -155,7 +152,7 @@ function wrap(tree, file) {
155
152
156
153
function doctype ( node ) {
157
154
var p5 = toParse5 ( node )
158
-
155
+ resetTokenizer ( )
159
156
parser . _processToken ( {
160
157
type : doctypeToken ,
161
158
name : p5 . name ,
@@ -167,6 +164,7 @@ function wrap(tree, file) {
167
164
}
168
165
169
166
function comment ( node ) {
167
+ resetTokenizer ( )
170
168
parser . _processToken ( {
171
169
type : commentToken ,
172
170
data : node . value ,
@@ -182,35 +180,38 @@ function wrap(tree, file) {
182
180
var token
183
181
184
182
// Reset preprocessor:
185
- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/tokenizer/preprocessor.js>.
183
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/tokenizer/preprocessor.js>.
186
184
preprocessor . html = null
187
- preprocessor . endOfChunkHit = false
188
- preprocessor . lastChunkWritten = false
189
- preprocessor . lastCharPos = - 1
190
185
preprocessor . pos = - 1
186
+ preprocessor . lastGapPos = - 1
187
+ preprocessor . lastCharPos = - 1
188
+ preprocessor . gapStack = [ ]
189
+ preprocessor . skipNextNewLine = false
190
+ preprocessor . lastChunkWritten = false
191
+ preprocessor . endOfChunkHit = false
191
192
192
193
// Reset preprocessor mixin:
193
- // See: <https://github.com/inikulin/parse5/blob/0491902/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>.
194
- posTracker . droppedBufferSize = 0
195
- posTracker . line = line
196
- posTracker . col = 1
197
- posTracker . offset = 0
198
- posTracker . lineStartPos = - column + 1
194
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/extensions/position-tracking/preprocessor-mixin.js>.
195
+ posTracker . isEol = false
196
+ posTracker . lineStartPos = - column + 1 // Looks weird, but ensures we get correct positional info.
199
197
posTracker . droppedBufferSize = offset
198
+ posTracker . offset = 0
199
+ posTracker . col = 1
200
+ posTracker . line = line
200
201
201
202
// Reset location tracker:
202
- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>.
203
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js>.
203
204
locationTracker . currentAttrLocation = null
204
205
locationTracker . ctLoc = createParse5Location ( node )
205
206
206
207
// See the code for `parse` and `parseFragment`:
207
- // See: <https://github.com/inikulin/parse5/blob/0491902 /packages/parse5/lib/parser/index.js#L371>.
208
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/parser/index.js#L371>.
208
209
tokenizer . write ( node . value )
209
210
parser . _runParsingLoop ( null )
210
211
211
212
// Process final characters if they’re still there after hibernating.
212
213
// Similar to:
213
- // See: <https://github.com/inikulin/parse5/blob/3bfa7d9 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>.
214
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1 /packages/parse5/lib/extensions/location-info/tokenizer-mixin.js#L95>.
214
215
token = tokenizer . currentCharacterToken
215
216
216
217
if ( token ) {
@@ -219,11 +220,26 @@ function wrap(tree, file) {
219
220
token . location . endOffset = posTracker . offset + 1
220
221
parser . _processToken ( token )
221
222
}
223
+ }
222
224
225
+ function resetTokenizer ( ) {
223
226
// Reset tokenizer:
224
- // See: <https://github.com/inikulin/parse5/blob/8b0048e/packages/parse5/lib/tokenizer/index.js#L215>.
225
- tokenizer . currentToken = null
227
+ // See: <https://github.com/inikulin/parse5/blob/9c683e1/packages/parse5/lib/tokenizer/index.js#L218-L234>.
228
+ // Especially putting it back in the `data` state is useful: some elements,
229
+ // like textareas and iframes, change the state.
230
+ // See GH-7.
231
+ // But also if broken HTML is in `raw`, and then a correct element is given.
232
+ // See GH-11.
233
+ tokenizer . tokenQueue = [ ]
234
+ tokenizer . state = dataState
235
+ tokenizer . returnState = ''
236
+ tokenizer . charRefCode = - 1
237
+ tokenizer . tempBuff = [ ]
238
+ tokenizer . lastStartTagName = ''
239
+ tokenizer . consumedAfterSnapshot = - 1
240
+ tokenizer . active = false
226
241
tokenizer . currentCharacterToken = null
242
+ tokenizer . currentToken = null
227
243
tokenizer . currentAttr = null
228
244
}
229
245
}
0 commit comments