parent
44630c27ff
commit
37c5b819fa
|
@ -144,11 +144,16 @@ module Oga
|
||||||
# Identifiers are used for element and attribute names. Identifiers have
|
# Identifiers are used for element and attribute names. Identifiers have
|
||||||
# to start with a letter.
|
# to start with a letter.
|
||||||
|
|
||||||
ident_word = [a-zA-Z\-_0-9]*;
|
unicode = any - ascii;
|
||||||
|
|
||||||
ident_escape = '\\.' %{ @escaped = true };
|
unicode_or_ascii = (unicode | [a-zA-Z\-_0-9])*;
|
||||||
|
|
||||||
identifier = '*' | [a-zA-Z_]+ ident_word (ident_escape ident_word)*;
|
escaped_dot = '\\.' %{ @escaped = true };
|
||||||
|
|
||||||
|
identifier
|
||||||
|
= '*'
|
||||||
|
| (unicode | [a-zA-Z_]) unicode_or_ascii (escaped_dot unicode_or_ascii)*
|
||||||
|
;
|
||||||
|
|
||||||
action emit_identifier {
|
action emit_identifier {
|
||||||
value = slice_input(ts, te)
|
value = slice_input(ts, te)
|
||||||
|
|
|
@ -176,7 +176,11 @@ module Oga
|
||||||
# Identifiers are used for element names, namespaces, attribute names,
|
# Identifiers are used for element names, namespaces, attribute names,
|
||||||
# etc. Identifiers have to start with a letter.
|
# etc. Identifiers have to start with a letter.
|
||||||
|
|
||||||
identifier = '*' | [a-zA-Z_]+ [a-zA-Z\-_0-9]*;
|
unicode = any - ascii;
|
||||||
|
|
||||||
|
unicode_or_ascii = (unicode | [a-zA-Z\-_0-9\.])*;
|
||||||
|
|
||||||
|
identifier = '*' | (unicode | [a-zA-Z_]) unicode_or_ascii ;
|
||||||
|
|
||||||
action emit_identifier {
|
action emit_identifier {
|
||||||
emit(:T_IDENT, ts, te)
|
emit(:T_IDENT, ts, te)
|
||||||
|
|
|
@ -6,6 +6,14 @@ describe Oga::CSS::Lexer do
|
||||||
lex_css('h3').should == [[:T_IDENT, 'h3']]
|
lex_css('h3').should == [[:T_IDENT, 'h3']]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes a path with Unicode characters' do
|
||||||
|
lex_css('áâã').should == [[:T_IDENT, 'áâã']]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes a path with Unicode and ASCII characters' do
|
||||||
|
lex_css('áâãfoo').should == [[:T_IDENT, 'áâãfoo']]
|
||||||
|
end
|
||||||
|
|
||||||
it 'lexes a simple path starting with an underscore' do
|
it 'lexes a simple path starting with an underscore' do
|
||||||
lex_css('_h3').should == [[:T_IDENT, '_h3']]
|
lex_css('_h3').should == [[:T_IDENT, '_h3']]
|
||||||
end
|
end
|
||||||
|
|
|
@ -6,6 +6,18 @@ describe Oga::XPath::Lexer do
|
||||||
lex_xpath('/foo').should == [[:T_SLASH, nil], [:T_IDENT, 'foo']]
|
lex_xpath('/foo').should == [[:T_SLASH, nil], [:T_IDENT, 'foo']]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes an expression using Unicode identifiers' do
|
||||||
|
lex_xpath('fóó').should == [[:T_IDENT, 'fóó']]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an expression using Unicode plus ASCII identifiers' do
|
||||||
|
lex_xpath('fóóbar').should == [[:T_IDENT, 'fóóbar']]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes an expression using an identifier with a dot' do
|
||||||
|
lex_xpath('foo.bar').should == [[:T_IDENT, 'foo.bar']]
|
||||||
|
end
|
||||||
|
|
||||||
it 'lexes a simple expression with a test starting with an underscore' do
|
it 'lexes a simple expression with a test starting with an underscore' do
|
||||||
lex_xpath('/_foo').should == [[:T_SLASH, nil], [:T_IDENT, '_foo']]
|
lex_xpath('/_foo').should == [[:T_SLASH, nil], [:T_IDENT, '_foo']]
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue