First step at rewriting the CSS parser.

The new setup will not involve a separate transformation stage, instead the CSS
parser will directly emit an XPath AST. This reduces the overhead needed for
parsing/evaluating CSS selectors while also simplifying the code. The downside
is that I basically have to re-write 80% of the parser.
This commit is contained in:
Yorick Peterse 2014-10-20 00:30:16 +02:00
parent ea2baa2020
commit d4150fd0f5
7 changed files with 161 additions and 192 deletions

View File

@ -50,4 +50,3 @@ require_relative 'oga/xpath/evaluator'
require_relative 'oga/css/lexer' require_relative 'oga/css/lexer'
require_relative 'oga/css/parser' require_relative 'oga/css/parser'
require_relative 'oga/css/transformer'

View File

@ -23,30 +23,55 @@ rule
; ;
expression expression
: path : steps { s(:path, *val[0]) }
| path_member | step
; ;
path steps
: path_members { s(:path, *val[0]) } : step T_SPACE step { [val[0], val[2]] }
| step T_SPACE steps { [val[0], *val[2]] }
; ;
path_members step
: path_member T_SPACE path_member { [val[0], val[2]] } : step_test { s(:axis, 'descendant-or-self', val[0]) }
| path_member T_SPACE path_members { [val[0], *val[2]] }
; ;
path_member step_test
: node_test : element_test { val[0] }
| axis | step_predicates { s(:test, nil, '*', val[0]) }
| pseudo_class ;
| class
step_predicates
: step_predicate
| step_predicates step_predicate { s(:and, val[0], val[1]) }
;
step_predicate
: class
| id | id
#| axis
#| pseudo_class
; ;
node_test element_test
: node_name { s(:test, *val[0]) } # foo
: node_name { s(:test, *val[0]) }
# foo[bar]
| node_name predicate { s(:test, *val[0], val[1]) } | node_name predicate { s(:test, *val[0], val[1]) }
# foo:root
| node_name step_predicates { s(:test, *val[0], val[1]) }
# foo[bar]:root
| node_name predicate step_predicates
{
s(:test, *val[0], s(:and, val[1], val[2]))
}
;
attribute_test
: node_name { s(:test, *val[0]) }
; ;
node_name node_name
@ -65,108 +90,112 @@ rule
; ;
predicate_members predicate_members
: node_test : attribute_test
| operator #| operator
; ;
class class
: class_name { s(:class, nil, val[0]) } : T_DOT T_IDENT
| path_member class_name { s(:class, val[0], val[1]) }
;
class_name
: T_DOT T_IDENT { val[1] }
;
id
: id_name { s(:id, nil, val[0]) }
| path_member id_name { s(:id, val[0], val[1]) }
;
id_name
: T_HASH T_IDENT { val[1] }
;
operator
: op_members T_EQ op_members { s(:eq, val[0], val[2]) }
| op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
| op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
| op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
| op_members T_IN op_members { s(:in, val[0], val[2]) }
| op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
;
op_members
: node_test
| string
;
axis
# x > y
: path_member T_CHILD path_member { s(:child, val[0], val[2]) }
# x + y
| path_member T_FOLLOWING path_member { s(:following, val[0], val[2]) }
# x ~ y
| path_member T_FOLLOWING_DIRECT path_member
{ {
s(:following_direct, val[0], val[2]) s(
:eq,
s(:axis, 'attribute', s(:test, nil, 'class')),
s(:string, val[1])
)
} }
; ;
pseudo_class id
# :root : T_HASH T_IDENT
: pseudo_name { s(:pseudo, nil, val[0]) } {
s(
# x:root :eq,
| path_member pseudo_name { s(:pseudo, val[0], val[1]) } s(:axis, 'attribute', s(:test, nil, 'id')),
s(:string, val[1])
# :nth-child(2) )
| pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) } }
# x:nth-child(2)
| path_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
; ;
pseudo_name # operator
: T_COLON T_IDENT { val[1] } # : op_members T_EQ op_members { s(:eq, val[0], val[2]) }
; # | op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
# | op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
pseudo_args # | op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
: T_LPAREN pseudo_arg T_RPAREN { val[1] } # | op_members T_IN op_members { s(:in, val[0], val[2]) }
; # | op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
# ;
pseudo_arg #
: integer # op_members
| odd # : node_test
| even # | string
| nth # ;
| node_test #
; # axis
# # x > y
odd # : step_member T_CHILD step_member { s(:child, val[0], val[2]) }
: T_ODD { s(:odd) } #
; # # x + y
# | step_member T_FOLLOWING step_member { s(:following, val[0], val[2]) }
even #
: T_EVEN { s(:even) } # # x ~ y
; # | step_member T_FOLLOWING_DIRECT step_member
# {
nth # s(:following_direct, val[0], val[2])
: T_NTH { s(:nth) } # }
| T_MINUS T_NTH { s(:nth) } # ;
| integer T_NTH { s(:nth, val[0]) } #
| integer T_NTH integer { s(:nth, val[0], val[2]) } # pseudo_class
; # # :root
# : pseudo_name { s(:pseudo, nil, val[0]) }
string #
: T_STRING { s(:string, val[0]) } # # x:root
; # | step_member pseudo_name { s(:pseudo, val[0], val[1]) }
#
integer # # :nth-child(2)
: T_INT { s(:int, val[0].to_i) } # | pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) }
; #
# # x:nth-child(2)
# | step_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
# ;
#
# pseudo_name
# : T_COLON T_IDENT { val[1] }
# ;
#
# pseudo_args
# : T_LPAREN pseudo_arg T_RPAREN { val[1] }
# ;
#
# pseudo_arg
# : integer
# | odd
# | even
# | nth
# | node_test
# ;
#
# odd
# : T_ODD { s(:odd) }
# ;
#
# even
# : T_EVEN { s(:even) }
# ;
#
# nth
# : T_NTH { s(:nth) }
# | T_MINUS T_NTH { s(:nth) }
# | integer T_NTH { s(:nth, val[0]) }
# | integer T_NTH integer { s(:nth, val[0], val[2]) }
# ;
#
# string
# : T_STRING { s(:string, val[0]) }
# ;
#
# integer
# : T_INT { s(:int, val[0].to_i) }
# ;
end end
---- inner ---- inner

View File

@ -1,30 +0,0 @@
module Oga
module CSS
##
# Transforms an CSS AST into a corresponding XPath AST.
#
class Transformer < AST::Processor
def on_class(node)
name, test = node.to_a
unless test
test = s(:test, nil, '*')
end
predicate = s(
:eq,
s(:axis, 'attribute', s(:test, nil, 'class')),
s(:string, name)
)
return s(:axis, 'child', test.updated(nil, test.children + [predicate]))
end
private
def s(type, *children)
return AST::Node.new(type, children)
end
end # Transformer
end # CSS
end # Oga

View File

@ -3,30 +3,26 @@ require 'spec_helper'
describe Oga::CSS::Parser do describe Oga::CSS::Parser do
context 'classes' do context 'classes' do
example 'parse a class selector' do example 'parse a class selector' do
parse_css('.foo').should == s(:class, nil, 'foo') parse_css('.foo').should == parse_xpath(
'descendant-or-self::*[@class="foo"]'
)
end end
example 'parse a selector for an element with a class' do example 'parse a selector for an element with a class' do
parse_css('foo.bar').should == s(:class, s(:test, nil, 'foo'), 'bar') parse_css('foo.bar').should == parse_xpath(
'descendant-or-self::foo[@class="bar"]'
)
end end
example 'parse a selector using multiple classes' do example 'parse a selector using multiple classes' do
parse_css('.foo.bar').should == s(:class, s(:class, nil, 'foo'), 'bar') parse_css('.foo.bar').should == parse_xpath(
'descendant-or-self::*[@class="foo" and @class="bar"]'
)
end end
example 'parse a selector using a class and an ID' do example 'parse a selector using a class and an ID' do
parse_css('#foo.bar').should == s(:class, s(:id, nil, 'foo'), 'bar') parse_css('#foo.bar').should == parse_xpath(
end 'descendant-or-self::*[@id="foo" and @class="bar"]'
example 'parse a selector using a class and a pseudo class' do
parse_css('.foo:root').should == s(:pseudo, s(:class, nil, 'foo'), 'root')
end
example 'parse a selector using a pseudo class and a class' do
parse_css('x:root.foo').should == s(
:class,
s(:pseudo, s(:test, nil, 'x'), 'root'),
'foo'
) )
end end
end end

View File

@ -3,26 +3,26 @@ require 'spec_helper'
describe Oga::CSS::Parser do describe Oga::CSS::Parser do
context 'IDs' do context 'IDs' do
example 'parse an ID selector' do example 'parse an ID selector' do
parse_css('#foo').should == s(:id, nil, 'foo') parse_css('#foo').should == parse_xpath(
'descendant-or-self::*[@id="foo"]'
)
end end
example 'parse a selector for an element with an ID' do example 'parse a selector for an element with an ID' do
parse_css('foo#bar').should == s(:id, s(:test, nil, 'foo'), 'bar') parse_css('foo#bar').should == parse_xpath(
'descendant-or-self::foo[@id="bar"]'
)
end
example 'parse a selector using multiple IDs' do
parse_css('#foo#bar').should == parse_xpath(
'descendant-or-self::*[@id="foo" and @id="bar"]'
)
end end
example 'parse a selector using an ID and a class' do example 'parse a selector using an ID and a class' do
parse_css('.foo#bar').should == s(:id, s(:class, nil, 'foo'), 'bar') parse_css('.foo#bar').should == parse_xpath(
end 'descendant-or-self::*[@class="foo" and @id="bar"]'
example 'parse a selector using an ID and a pseudo class' do
parse_css('#foo:root').should == s(:pseudo, s(:id, nil, 'foo'), 'root')
end
example 'parse a selector using a pseudo class and an ID' do
parse_css('x:root#foo').should == s(
:id,
s(:pseudo, s(:test, nil, 'x'), 'root'),
'foo'
) )
end end
end end

View File

@ -1,13 +0,0 @@
require 'spec_helper'
describe Oga::CSS::Transformer do
context 'classes' do
example 'convert a class node without a node test' do
transform_css('.y').should == parse_xpath('*[@class="y"]')
end
example 'convert a class node with a node test' do
transform_css('x.y').should == parse_xpath('x[@class="y"]')
end
end
end

View File

@ -89,17 +89,5 @@ module Oga
rescue Racc::ParseError => error rescue Racc::ParseError => error
return error.message return error.message
end end
##
# Parses and transforms a CSS AST into an XPath AST.
#
# @param [String] css
# @return [AST::Node]
#
def transform_css(css)
ast = parse_css(css)
return Oga::CSS::Transformer.new.process(ast)
end
end # ParsingHelpers end # ParsingHelpers
end # Oga end # Oga