Support escaping dots in CSS identifiers
Escaping hash characters and whitespace is _not_ supported as neither are valid element/attribute names (e.g. <foo#bar /> is invalid XML/HTML). Escaping single/double quotes also won't be supported for the time being. It's quite a pain to get this to work right in not just CSS but also XPath and XML/HTML, for very little gain. Should there be enough users with an actual use case (other than "But the spec says ...!") I'll look into this again. Fixes #124
This commit is contained in:
parent
aef7c510c2
commit
44630c27ff
|
@ -49,6 +49,7 @@ module Oga
|
||||||
# @see [#add_token]
|
# @see [#add_token]
|
||||||
def advance(&block)
|
def advance(&block)
|
||||||
@block = block
|
@block = block
|
||||||
|
@escaped = false
|
||||||
|
|
||||||
data = @data # saves ivar lookups while lexing.
|
data = @data # saves ivar lookups while lexing.
|
||||||
ts = nil
|
ts = nil
|
||||||
|
@ -143,10 +144,22 @@ module Oga
|
||||||
# Identifiers are used for element and attribute names. Identifiers have
|
# Identifiers are used for element and attribute names. Identifiers have
|
||||||
# to start with a letter.
|
# to start with a letter.
|
||||||
|
|
||||||
identifier = '*' | [a-zA-Z_]+ [a-zA-Z\-_0-9]*;
|
ident_word = [a-zA-Z\-_0-9]*;
|
||||||
|
|
||||||
|
ident_escape = '\\.' %{ @escaped = true };
|
||||||
|
|
||||||
|
identifier = '*' | [a-zA-Z_]+ ident_word (ident_escape ident_word)*;
|
||||||
|
|
||||||
action emit_identifier {
|
action emit_identifier {
|
||||||
emit(:T_IDENT, ts, te)
|
value = slice_input(ts, te)
|
||||||
|
|
||||||
|
# Translates "foo\.bar" into "foo.bar"
|
||||||
|
if @escaped
|
||||||
|
value = value.gsub('\.', '.')
|
||||||
|
@escaped = false
|
||||||
|
end
|
||||||
|
|
||||||
|
add_token(:T_IDENT, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Operators
|
# Operators
|
||||||
|
|
|
@ -10,6 +10,18 @@ describe Oga::CSS::Lexer do
|
||||||
lex_css('_h3').should == [[:T_IDENT, '_h3']]
|
lex_css('_h3').should == [[:T_IDENT, '_h3']]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'lexes a path with an escaped identifier' do
|
||||||
|
lex_css('foo\.bar\.baz').should == [[:T_IDENT, 'foo.bar.baz']]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'lexes a path with an escaped identifier followed by another identifier' do
|
||||||
|
lex_css('foo\.bar baz').should == [
|
||||||
|
[:T_IDENT, 'foo.bar'],
|
||||||
|
[:T_SPACE, nil],
|
||||||
|
[:T_IDENT, 'baz']
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
it 'lexes a path with two members' do
|
it 'lexes a path with two members' do
|
||||||
lex_css('div h3').should == [
|
lex_css('div h3').should == [
|
||||||
[:T_IDENT, 'div'],
|
[:T_IDENT, 'div'],
|
||||||
|
|
Loading…
Reference in New Issue