First step at rewriting the CSS parser.
The new setup will not involve a separate transformation stage, instead the CSS parser will directly emit an XPath AST. This reduces the overhead needed for parsing/evaluating CSS selectors while also simplifying the code. The downside is that I basically have to re-write 80% of the parser.
This commit is contained in:
parent
ea2baa2020
commit
d4150fd0f5
|
@ -50,4 +50,3 @@ require_relative 'oga/xpath/evaluator'
|
|||
|
||||
require_relative 'oga/css/lexer'
|
||||
require_relative 'oga/css/parser'
|
||||
require_relative 'oga/css/transformer'
|
||||
|
|
|
@ -23,30 +23,55 @@ rule
|
|||
;
|
||||
|
||||
expression
|
||||
: path
|
||||
| path_member
|
||||
: steps { s(:path, *val[0]) }
|
||||
| step
|
||||
;
|
||||
|
||||
path
|
||||
: path_members { s(:path, *val[0]) }
|
||||
steps
|
||||
: step T_SPACE step { [val[0], val[2]] }
|
||||
| step T_SPACE steps { [val[0], *val[2]] }
|
||||
;
|
||||
|
||||
path_members
|
||||
: path_member T_SPACE path_member { [val[0], val[2]] }
|
||||
| path_member T_SPACE path_members { [val[0], *val[2]] }
|
||||
step
|
||||
: step_test { s(:axis, 'descendant-or-self', val[0]) }
|
||||
;
|
||||
|
||||
path_member
|
||||
: node_test
|
||||
| axis
|
||||
| pseudo_class
|
||||
| class
|
||||
step_test
|
||||
: element_test { val[0] }
|
||||
| step_predicates { s(:test, nil, '*', val[0]) }
|
||||
;
|
||||
|
||||
step_predicates
|
||||
: step_predicate
|
||||
| step_predicates step_predicate { s(:and, val[0], val[1]) }
|
||||
;
|
||||
|
||||
step_predicate
|
||||
: class
|
||||
| id
|
||||
#| axis
|
||||
#| pseudo_class
|
||||
;
|
||||
|
||||
node_test
|
||||
: node_name { s(:test, *val[0]) }
|
||||
element_test
|
||||
# foo
|
||||
: node_name { s(:test, *val[0]) }
|
||||
|
||||
# foo[bar]
|
||||
| node_name predicate { s(:test, *val[0], val[1]) }
|
||||
|
||||
# foo:root
|
||||
| node_name step_predicates { s(:test, *val[0], val[1]) }
|
||||
|
||||
# foo[bar]:root
|
||||
| node_name predicate step_predicates
|
||||
{
|
||||
s(:test, *val[0], s(:and, val[1], val[2]))
|
||||
}
|
||||
;
|
||||
|
||||
attribute_test
|
||||
: node_name { s(:test, *val[0]) }
|
||||
;
|
||||
|
||||
node_name
|
||||
|
@ -65,108 +90,112 @@ rule
|
|||
;
|
||||
|
||||
predicate_members
|
||||
: node_test
|
||||
| operator
|
||||
: attribute_test
|
||||
#| operator
|
||||
;
|
||||
|
||||
class
|
||||
: class_name { s(:class, nil, val[0]) }
|
||||
| path_member class_name { s(:class, val[0], val[1]) }
|
||||
;
|
||||
|
||||
class_name
|
||||
: T_DOT T_IDENT { val[1] }
|
||||
;
|
||||
|
||||
id
|
||||
: id_name { s(:id, nil, val[0]) }
|
||||
| path_member id_name { s(:id, val[0], val[1]) }
|
||||
;
|
||||
|
||||
id_name
|
||||
: T_HASH T_IDENT { val[1] }
|
||||
;
|
||||
|
||||
operator
|
||||
: op_members T_EQ op_members { s(:eq, val[0], val[2]) }
|
||||
| op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
|
||||
| op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
|
||||
| op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
|
||||
| op_members T_IN op_members { s(:in, val[0], val[2]) }
|
||||
| op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
|
||||
;
|
||||
|
||||
op_members
|
||||
: node_test
|
||||
| string
|
||||
;
|
||||
|
||||
axis
|
||||
# x > y
|
||||
: path_member T_CHILD path_member { s(:child, val[0], val[2]) }
|
||||
|
||||
# x + y
|
||||
| path_member T_FOLLOWING path_member { s(:following, val[0], val[2]) }
|
||||
|
||||
# x ~ y
|
||||
| path_member T_FOLLOWING_DIRECT path_member
|
||||
: T_DOT T_IDENT
|
||||
{
|
||||
s(:following_direct, val[0], val[2])
|
||||
s(
|
||||
:eq,
|
||||
s(:axis, 'attribute', s(:test, nil, 'class')),
|
||||
s(:string, val[1])
|
||||
)
|
||||
}
|
||||
;
|
||||
|
||||
pseudo_class
|
||||
# :root
|
||||
: pseudo_name { s(:pseudo, nil, val[0]) }
|
||||
|
||||
# x:root
|
||||
| path_member pseudo_name { s(:pseudo, val[0], val[1]) }
|
||||
|
||||
# :nth-child(2)
|
||||
| pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) }
|
||||
|
||||
# x:nth-child(2)
|
||||
| path_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
|
||||
id
|
||||
: T_HASH T_IDENT
|
||||
{
|
||||
s(
|
||||
:eq,
|
||||
s(:axis, 'attribute', s(:test, nil, 'id')),
|
||||
s(:string, val[1])
|
||||
)
|
||||
}
|
||||
;
|
||||
|
||||
pseudo_name
|
||||
: T_COLON T_IDENT { val[1] }
|
||||
;
|
||||
|
||||
pseudo_args
|
||||
: T_LPAREN pseudo_arg T_RPAREN { val[1] }
|
||||
;
|
||||
|
||||
pseudo_arg
|
||||
: integer
|
||||
| odd
|
||||
| even
|
||||
| nth
|
||||
| node_test
|
||||
;
|
||||
|
||||
odd
|
||||
: T_ODD { s(:odd) }
|
||||
;
|
||||
|
||||
even
|
||||
: T_EVEN { s(:even) }
|
||||
;
|
||||
|
||||
nth
|
||||
: T_NTH { s(:nth) }
|
||||
| T_MINUS T_NTH { s(:nth) }
|
||||
| integer T_NTH { s(:nth, val[0]) }
|
||||
| integer T_NTH integer { s(:nth, val[0], val[2]) }
|
||||
;
|
||||
|
||||
string
|
||||
: T_STRING { s(:string, val[0]) }
|
||||
;
|
||||
|
||||
integer
|
||||
: T_INT { s(:int, val[0].to_i) }
|
||||
;
|
||||
# operator
|
||||
# : op_members T_EQ op_members { s(:eq, val[0], val[2]) }
|
||||
# | op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
|
||||
# | op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
|
||||
# | op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
|
||||
# | op_members T_IN op_members { s(:in, val[0], val[2]) }
|
||||
# | op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
|
||||
# ;
|
||||
#
|
||||
# op_members
|
||||
# : node_test
|
||||
# | string
|
||||
# ;
|
||||
#
|
||||
# axis
|
||||
# # x > y
|
||||
# : step_member T_CHILD step_member { s(:child, val[0], val[2]) }
|
||||
#
|
||||
# # x + y
|
||||
# | step_member T_FOLLOWING step_member { s(:following, val[0], val[2]) }
|
||||
#
|
||||
# # x ~ y
|
||||
# | step_member T_FOLLOWING_DIRECT step_member
|
||||
# {
|
||||
# s(:following_direct, val[0], val[2])
|
||||
# }
|
||||
# ;
|
||||
#
|
||||
# pseudo_class
|
||||
# # :root
|
||||
# : pseudo_name { s(:pseudo, nil, val[0]) }
|
||||
#
|
||||
# # x:root
|
||||
# | step_member pseudo_name { s(:pseudo, val[0], val[1]) }
|
||||
#
|
||||
# # :nth-child(2)
|
||||
# | pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) }
|
||||
#
|
||||
# # x:nth-child(2)
|
||||
# | step_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
|
||||
# ;
|
||||
#
|
||||
# pseudo_name
|
||||
# : T_COLON T_IDENT { val[1] }
|
||||
# ;
|
||||
#
|
||||
# pseudo_args
|
||||
# : T_LPAREN pseudo_arg T_RPAREN { val[1] }
|
||||
# ;
|
||||
#
|
||||
# pseudo_arg
|
||||
# : integer
|
||||
# | odd
|
||||
# | even
|
||||
# | nth
|
||||
# | node_test
|
||||
# ;
|
||||
#
|
||||
# odd
|
||||
# : T_ODD { s(:odd) }
|
||||
# ;
|
||||
#
|
||||
# even
|
||||
# : T_EVEN { s(:even) }
|
||||
# ;
|
||||
#
|
||||
# nth
|
||||
# : T_NTH { s(:nth) }
|
||||
# | T_MINUS T_NTH { s(:nth) }
|
||||
# | integer T_NTH { s(:nth, val[0]) }
|
||||
# | integer T_NTH integer { s(:nth, val[0], val[2]) }
|
||||
# ;
|
||||
#
|
||||
# string
|
||||
# : T_STRING { s(:string, val[0]) }
|
||||
# ;
|
||||
#
|
||||
# integer
|
||||
# : T_INT { s(:int, val[0].to_i) }
|
||||
# ;
|
||||
end
|
||||
|
||||
---- inner
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
module Oga
|
||||
module CSS
|
||||
##
|
||||
# Transforms an CSS AST into a corresponding XPath AST.
|
||||
#
|
||||
class Transformer < AST::Processor
|
||||
def on_class(node)
|
||||
name, test = node.to_a
|
||||
|
||||
unless test
|
||||
test = s(:test, nil, '*')
|
||||
end
|
||||
|
||||
predicate = s(
|
||||
:eq,
|
||||
s(:axis, 'attribute', s(:test, nil, 'class')),
|
||||
s(:string, name)
|
||||
)
|
||||
|
||||
return s(:axis, 'child', test.updated(nil, test.children + [predicate]))
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def s(type, *children)
|
||||
return AST::Node.new(type, children)
|
||||
end
|
||||
end # Transformer
|
||||
end # CSS
|
||||
end # Oga
|
|
@ -3,30 +3,26 @@ require 'spec_helper'
|
|||
describe Oga::CSS::Parser do
|
||||
context 'classes' do
|
||||
example 'parse a class selector' do
|
||||
parse_css('.foo').should == s(:class, nil, 'foo')
|
||||
parse_css('.foo').should == parse_xpath(
|
||||
'descendant-or-self::*[@class="foo"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector for an element with a class' do
|
||||
parse_css('foo.bar').should == s(:class, s(:test, nil, 'foo'), 'bar')
|
||||
parse_css('foo.bar').should == parse_xpath(
|
||||
'descendant-or-self::foo[@class="bar"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector using multiple classes' do
|
||||
parse_css('.foo.bar').should == s(:class, s(:class, nil, 'foo'), 'bar')
|
||||
parse_css('.foo.bar').should == parse_xpath(
|
||||
'descendant-or-self::*[@class="foo" and @class="bar"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector using a class and an ID' do
|
||||
parse_css('#foo.bar').should == s(:class, s(:id, nil, 'foo'), 'bar')
|
||||
end
|
||||
|
||||
example 'parse a selector using a class and a pseudo class' do
|
||||
parse_css('.foo:root').should == s(:pseudo, s(:class, nil, 'foo'), 'root')
|
||||
end
|
||||
|
||||
example 'parse a selector using a pseudo class and a class' do
|
||||
parse_css('x:root.foo').should == s(
|
||||
:class,
|
||||
s(:pseudo, s(:test, nil, 'x'), 'root'),
|
||||
'foo'
|
||||
parse_css('#foo.bar').should == parse_xpath(
|
||||
'descendant-or-self::*[@id="foo" and @class="bar"]'
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,26 +3,26 @@ require 'spec_helper'
|
|||
describe Oga::CSS::Parser do
|
||||
context 'IDs' do
|
||||
example 'parse an ID selector' do
|
||||
parse_css('#foo').should == s(:id, nil, 'foo')
|
||||
parse_css('#foo').should == parse_xpath(
|
||||
'descendant-or-self::*[@id="foo"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector for an element with an ID' do
|
||||
parse_css('foo#bar').should == s(:id, s(:test, nil, 'foo'), 'bar')
|
||||
parse_css('foo#bar').should == parse_xpath(
|
||||
'descendant-or-self::foo[@id="bar"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector using multiple IDs' do
|
||||
parse_css('#foo#bar').should == parse_xpath(
|
||||
'descendant-or-self::*[@id="foo" and @id="bar"]'
|
||||
)
|
||||
end
|
||||
|
||||
example 'parse a selector using an ID and a class' do
|
||||
parse_css('.foo#bar').should == s(:id, s(:class, nil, 'foo'), 'bar')
|
||||
end
|
||||
|
||||
example 'parse a selector using an ID and a pseudo class' do
|
||||
parse_css('#foo:root').should == s(:pseudo, s(:id, nil, 'foo'), 'root')
|
||||
end
|
||||
|
||||
example 'parse a selector using a pseudo class and an ID' do
|
||||
parse_css('x:root#foo').should == s(
|
||||
:id,
|
||||
s(:pseudo, s(:test, nil, 'x'), 'root'),
|
||||
'foo'
|
||||
parse_css('.foo#bar').should == parse_xpath(
|
||||
'descendant-or-self::*[@class="foo" and @id="bar"]'
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::CSS::Transformer do
|
||||
context 'classes' do
|
||||
example 'convert a class node without a node test' do
|
||||
transform_css('.y').should == parse_xpath('*[@class="y"]')
|
||||
end
|
||||
|
||||
example 'convert a class node with a node test' do
|
||||
transform_css('x.y').should == parse_xpath('x[@class="y"]')
|
||||
end
|
||||
end
|
||||
end
|
|
@ -89,17 +89,5 @@ module Oga
|
|||
rescue Racc::ParseError => error
|
||||
return error.message
|
||||
end
|
||||
|
||||
##
|
||||
# Parses and transforms a CSS AST into an XPath AST.
|
||||
#
|
||||
# @param [String] css
|
||||
# @return [AST::Node]
|
||||
#
|
||||
def transform_css(css)
|
||||
ast = parse_css(css)
|
||||
|
||||
return Oga::CSS::Transformer.new.process(ast)
|
||||
end
|
||||
end # ParsingHelpers
|
||||
end # Oga
|
||||
|
|
Loading…
Reference in New Issue