Lex attributes with namespaces.

These are lexed as just the name instead of two separate tokens.
This commit is contained in:
Yorick Peterse 2014-04-10 11:01:49 +02:00
parent c974b96b88
commit b96f7c4852
2 changed files with 13 additions and 4 deletions

View File

@ -255,7 +255,7 @@ module Oga
newline = '\n' | '\r\n'; newline = '\n' | '\r\n';
whitespace = [ \t]; whitespace = [ \t];
identifier = [a-zA-Z0-9\-_]+; identifier = [a-zA-Z0-9\-_:]+;
# Strings # Strings
# #
@ -470,8 +470,7 @@ module Oga
fcall element_head; fcall element_head;
} }
element_name = [a-zA-Z0-9\-_:]+; element_start = '<' identifier;
element_start = '<' element_name;
# Machine used for processing the characters inside a element head. An # Machine used for processing the characters inside a element head. An
# element head is everything between `<NAME` (where NAME is the element # element head is everything between `<NAME` (where NAME is the element
@ -515,7 +514,7 @@ module Oga
}; };
# Regular closing tags. # Regular closing tags.
'</' element_name '>' => { '</' identifier '>' => {
emit_buffer emit_buffer
add_token(:T_ELEM_END, nil) add_token(:T_ELEM_END, nil)

View File

@ -73,6 +73,16 @@ describe Oga::XML::Lexer do
[:T_ELEM_END, nil, 1] [:T_ELEM_END, nil, 1]
] ]
end end
example 'lex a paragraph element with a namespaced attribute' do
lex('<p foo:bar="baz"></p>').should == [
[:T_ELEM_START, nil, 1],
[:T_ELEM_NAME, 'p', 1],
[:T_ATTR, 'foo:bar', 1],
[:T_STRING, 'baz', 1],
[:T_ELEM_END, nil, 1]
]
end
end end
context 'nested elements' do context 'nested elements' do