Support escaping dots in CSS identifiers

Escaping hash characters and whitespace is _not_ supported as neither
are valid element/attribute names (e.g. <foo#bar /> is invalid
XML/HTML).

Escaping single/double quotes also won't be supported for the time
being. It's quite a pain to get this to work right in not just CSS but
also XPath and XML/HTML, for very little gain. Should there be enough
users with an actual use case (other than "But the spec says ...!") I'll
look into this again.

Fixes 
This commit is contained in:
Yorick Peterse 2015-09-02 19:51:09 +02:00
parent aef7c510c2
commit 44630c27ff
2 changed files with 28 additions and 3 deletions
lib/oga/css
spec/oga/css/lexer

View File

@ -48,7 +48,8 @@ module Oga
#
# @see [#add_token]
def advance(&block)
@block = block
@block = block
@escaped = false
data = @data # saves ivar lookups while lexing.
ts = nil
@ -143,10 +144,22 @@ module Oga
# Identifiers are used for element and attribute names. Identifiers have
# to start with a letter.
identifier = '*' | [a-zA-Z_]+ [a-zA-Z\-_0-9]*;
ident_word = [a-zA-Z\-_0-9]*;
ident_escape = '\\.' %{ @escaped = true };
identifier = '*' | [a-zA-Z_]+ ident_word (ident_escape ident_word)*;
action emit_identifier {
emit(:T_IDENT, ts, te)
value = slice_input(ts, te)
# Translates "foo\.bar" into "foo.bar"
if @escaped
value = value.gsub('\.', '.')
@escaped = false
end
add_token(:T_IDENT, value)
}
# Operators

View File

@ -10,6 +10,18 @@ describe Oga::CSS::Lexer do
lex_css('_h3').should == [[:T_IDENT, '_h3']]
end
it 'lexes a path with an escaped identifier' do
lex_css('foo\.bar\.baz').should == [[:T_IDENT, 'foo.bar.baz']]
end
it 'lexes a path with an escaped identifier followed by another identifier' do
lex_css('foo\.bar baz').should == [
[:T_IDENT, 'foo.bar'],
[:T_SPACE, nil],
[:T_IDENT, 'baz']
]
end
it 'lexes a path with two members' do
lex_css('div h3').should == [
[:T_IDENT, 'div'],