Support whitespace in element closing tags

Fixes #108
This commit is contained in:
Yorick Peterse 2015-05-25 13:41:17 +02:00
parent d0d597e2d9
commit d2523a1082
3 changed files with 53 additions and 12 deletions

View File

@ -368,10 +368,12 @@
fnext element_name; fnext element_name;
} }
action close_element { action start_close_element {
callback(id_on_element_end, data, encoding, mark, te - 1); fnext element_close;
}
mark = 0; action close_element {
callback(id_on_element_end, data, encoding, ts, te);
} }
action close_element_fnext_main { action close_element_fnext_main {
@ -381,10 +383,7 @@
} }
element_start = '<' ident_char; element_start = '<' ident_char;
element_end = '</';
element_end = '</' %{ mark = p; } identifier '>'
| '</' identifier ':' %{ mark = p; } identifier '>'
;
# Machine used for lexing the name/namespace of an element. # Machine used for lexing the name/namespace of an element.
element_name := |* element_name := |*
@ -398,6 +397,28 @@
}; };
*|; *|;
# Machine used for lexing the closing tag of an element
element_close := |*
# namespace prefixes, currently not used but allows the rule below it
# to be used for the actual element name.
identifier ':';
identifier => close_element;
'>' => {
if ( lines > 0 )
{
advance_line(lines);
lines = 0;
}
fnext main;
};
any $count_newlines;
*|;
# Characters that can be used for unquoted HTML attribute values. # Characters that can be used for unquoted HTML attribute values.
# See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example # See https://html.spec.whatwg.org/multipage/introduction.html#intro-early-example
# for more info. # for more info.
@ -587,7 +608,7 @@
cdata_start => start_cdata; cdata_start => start_cdata;
proc_ins_start => start_proc_ins; proc_ins_start => start_proc_ins;
element_start => start_element; element_start => start_element;
element_end => close_element; element_end => start_close_element;
any => start_text; any => start_text;
*|; *|;
}%% }%%

View File

@ -73,6 +73,30 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 2] [:T_ELEM_END, nil, 2]
] ]
end end
it 'lexes an element with a space in the closing tag' do
lex("<foo></foo >bar").should == [
[:T_ELEM_NAME, 'foo', 1],
[:T_ELEM_END, nil, 1],
[:T_TEXT, 'bar', 1]
]
end
it 'lexes an element with a newline in the closing tag' do
lex("<foo></foo\n>bar").should == [
[:T_ELEM_NAME, 'foo', 1],
[:T_ELEM_END, nil, 1],
[:T_TEXT, 'bar', 2]
]
end
it 'lexes an element with a newline in the closing tag using an IO as input' do
lex(StringIO.new("<foo></foo\n>bar")).should == [
[:T_ELEM_NAME, 'foo', 1],
[:T_ELEM_END, nil, 1],
[:T_TEXT, 'bar', 2]
]
end
end end
describe 'elements with attributes' do describe 'elements with attributes' do

View File

@ -30,10 +30,6 @@ describe Oga::XML::Lexer do
lex('>').should == [[:T_TEXT, '>', 1]] lex('>').should == [[:T_TEXT, '>', 1]]
end end
it 'lexes </ as regular text' do
lex('</').should == [[:T_TEXT, '</', 1]]
end
it 'lexes <! as regular text' do it 'lexes <! as regular text' do
lex('<!').should == [[:T_TEXT, '<!', 1]] lex('<!').should == [[:T_TEXT, '<!', 1]]
end end