Ignore nested element start tags
This ensures that Oga is able to tokenize input such as the following: <script<script>foo</script> Oga will now treat this as: <script>foo</script> This is based on libxml behaviour, which seems to differ a bit from Chromium which treats the node as a text node. This however would require complex look-ahead logic (as far as I can tell) that I really don't want to implement in Oga. Fixes #186
This commit is contained in:
parent
1e002de527
commit
f574197ea6
|
@ -389,6 +389,7 @@
|
|||
|
||||
element_start = '<' ident_char;
|
||||
element_end = '</';
|
||||
element_start_pattern = '<' identifier (':' identifier)?;
|
||||
|
||||
# Machine used for lexing the name/namespace of an element.
|
||||
element_name := |*
|
||||
|
@ -551,6 +552,7 @@
|
|||
# Machine used for processing the contents of an XML element's starting tag.
|
||||
element_head := |*
|
||||
newline => advance_newline;
|
||||
element_start_pattern;
|
||||
|
||||
# Attribute names and namespaces.
|
||||
identifier ':' => {
|
||||
|
@ -578,6 +580,7 @@
|
|||
# tag.
|
||||
html_element_head := |*
|
||||
newline => advance_newline;
|
||||
element_start_pattern;
|
||||
|
||||
html_identifier => {
|
||||
callback(id_on_attribute, data, encoding, ts, te);
|
||||
|
|
|
@ -344,4 +344,21 @@ describe Oga::XML::Lexer do
|
|||
[:T_ELEM_END, nil, 1]
|
||||
])
|
||||
end
|
||||
|
||||
it 'lexes an element with a nested opening tag' do
|
||||
expect(lex('<script<script>foo</script>')).to eq([
|
||||
[:T_ELEM_NAME, 'script', 1],
|
||||
[:T_TEXT, 'foo', 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
])
|
||||
end
|
||||
|
||||
it 'lexes an element with a nested opening tag followed by an attribute' do
|
||||
expect(lex('<script<script foo>foo</script>')).to eq([
|
||||
[:T_ELEM_NAME, 'script', 1],
|
||||
[:T_ATTR, 'foo', 1],
|
||||
[:T_TEXT, 'foo', 1],
|
||||
[:T_ELEM_END, nil, 1]
|
||||
])
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue