diff --git a/lib/oga/xml/lexer.rl b/lib/oga/xml/lexer.rl index 6afa0c4..6bca0eb 100644 --- a/lib/oga/xml/lexer.rl +++ b/lib/oga/xml/lexer.rl @@ -255,7 +255,7 @@ module Oga newline = '\n' | '\r\n'; whitespace = [ \t]; - identifier = [a-zA-Z0-9\-_]+; + identifier = [a-zA-Z0-9\-_:]+; # Strings # @@ -470,8 +470,7 @@ module Oga fcall element_head; } - element_name = [a-zA-Z0-9\-_:]+; - element_start = '<' element_name; + element_start = '<' identifier; # Machine used for processing the characters inside a element head. An # element head is everything between `' => { + '' => { emit_buffer add_token(:T_ELEM_END, nil) diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index 1e7e5f4..19bb346 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -73,6 +73,16 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ] end + + example 'lex a paragraph element with a namespaced attribute' do + lex('

').should == [ + [:T_ELEM_START, nil, 1], + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo:bar', 1], + [:T_STRING, 'baz', 1], + [:T_ELEM_END, nil, 1] + ] + end end context 'nested elements' do