First step at rewriting the CSS parser.
The new setup will not involve a separate transformation stage, instead the CSS parser will directly emit an XPath AST. This reduces the overhead needed for parsing/evaluating CSS selectors while also simplifying the code. The downside is that I basically have to re-write 80% of the parser.
This commit is contained in:
parent
ea2baa2020
commit
d4150fd0f5
|
@ -50,4 +50,3 @@ require_relative 'oga/xpath/evaluator'
|
||||||
|
|
||||||
require_relative 'oga/css/lexer'
|
require_relative 'oga/css/lexer'
|
||||||
require_relative 'oga/css/parser'
|
require_relative 'oga/css/parser'
|
||||||
require_relative 'oga/css/transformer'
|
|
||||||
|
|
|
@ -23,30 +23,55 @@ rule
|
||||||
;
|
;
|
||||||
|
|
||||||
expression
|
expression
|
||||||
: path
|
: steps { s(:path, *val[0]) }
|
||||||
| path_member
|
| step
|
||||||
;
|
;
|
||||||
|
|
||||||
path
|
steps
|
||||||
: path_members { s(:path, *val[0]) }
|
: step T_SPACE step { [val[0], val[2]] }
|
||||||
|
| step T_SPACE steps { [val[0], *val[2]] }
|
||||||
;
|
;
|
||||||
|
|
||||||
path_members
|
step
|
||||||
: path_member T_SPACE path_member { [val[0], val[2]] }
|
: step_test { s(:axis, 'descendant-or-self', val[0]) }
|
||||||
| path_member T_SPACE path_members { [val[0], *val[2]] }
|
|
||||||
;
|
;
|
||||||
|
|
||||||
path_member
|
step_test
|
||||||
: node_test
|
: element_test { val[0] }
|
||||||
| axis
|
| step_predicates { s(:test, nil, '*', val[0]) }
|
||||||
| pseudo_class
|
;
|
||||||
| class
|
|
||||||
|
step_predicates
|
||||||
|
: step_predicate
|
||||||
|
| step_predicates step_predicate { s(:and, val[0], val[1]) }
|
||||||
|
;
|
||||||
|
|
||||||
|
step_predicate
|
||||||
|
: class
|
||||||
| id
|
| id
|
||||||
|
#| axis
|
||||||
|
#| pseudo_class
|
||||||
;
|
;
|
||||||
|
|
||||||
node_test
|
element_test
|
||||||
: node_name { s(:test, *val[0]) }
|
# foo
|
||||||
|
: node_name { s(:test, *val[0]) }
|
||||||
|
|
||||||
|
# foo[bar]
|
||||||
| node_name predicate { s(:test, *val[0], val[1]) }
|
| node_name predicate { s(:test, *val[0], val[1]) }
|
||||||
|
|
||||||
|
# foo:root
|
||||||
|
| node_name step_predicates { s(:test, *val[0], val[1]) }
|
||||||
|
|
||||||
|
# foo[bar]:root
|
||||||
|
| node_name predicate step_predicates
|
||||||
|
{
|
||||||
|
s(:test, *val[0], s(:and, val[1], val[2]))
|
||||||
|
}
|
||||||
|
;
|
||||||
|
|
||||||
|
attribute_test
|
||||||
|
: node_name { s(:test, *val[0]) }
|
||||||
;
|
;
|
||||||
|
|
||||||
node_name
|
node_name
|
||||||
|
@ -65,108 +90,112 @@ rule
|
||||||
;
|
;
|
||||||
|
|
||||||
predicate_members
|
predicate_members
|
||||||
: node_test
|
: attribute_test
|
||||||
| operator
|
#| operator
|
||||||
;
|
;
|
||||||
|
|
||||||
class
|
class
|
||||||
: class_name { s(:class, nil, val[0]) }
|
: T_DOT T_IDENT
|
||||||
| path_member class_name { s(:class, val[0], val[1]) }
|
|
||||||
;
|
|
||||||
|
|
||||||
class_name
|
|
||||||
: T_DOT T_IDENT { val[1] }
|
|
||||||
;
|
|
||||||
|
|
||||||
id
|
|
||||||
: id_name { s(:id, nil, val[0]) }
|
|
||||||
| path_member id_name { s(:id, val[0], val[1]) }
|
|
||||||
;
|
|
||||||
|
|
||||||
id_name
|
|
||||||
: T_HASH T_IDENT { val[1] }
|
|
||||||
;
|
|
||||||
|
|
||||||
operator
|
|
||||||
: op_members T_EQ op_members { s(:eq, val[0], val[2]) }
|
|
||||||
| op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
|
|
||||||
| op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
|
|
||||||
| op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
|
|
||||||
| op_members T_IN op_members { s(:in, val[0], val[2]) }
|
|
||||||
| op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
|
|
||||||
;
|
|
||||||
|
|
||||||
op_members
|
|
||||||
: node_test
|
|
||||||
| string
|
|
||||||
;
|
|
||||||
|
|
||||||
axis
|
|
||||||
# x > y
|
|
||||||
: path_member T_CHILD path_member { s(:child, val[0], val[2]) }
|
|
||||||
|
|
||||||
# x + y
|
|
||||||
| path_member T_FOLLOWING path_member { s(:following, val[0], val[2]) }
|
|
||||||
|
|
||||||
# x ~ y
|
|
||||||
| path_member T_FOLLOWING_DIRECT path_member
|
|
||||||
{
|
{
|
||||||
s(:following_direct, val[0], val[2])
|
s(
|
||||||
|
:eq,
|
||||||
|
s(:axis, 'attribute', s(:test, nil, 'class')),
|
||||||
|
s(:string, val[1])
|
||||||
|
)
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
pseudo_class
|
id
|
||||||
# :root
|
: T_HASH T_IDENT
|
||||||
: pseudo_name { s(:pseudo, nil, val[0]) }
|
{
|
||||||
|
s(
|
||||||
# x:root
|
:eq,
|
||||||
| path_member pseudo_name { s(:pseudo, val[0], val[1]) }
|
s(:axis, 'attribute', s(:test, nil, 'id')),
|
||||||
|
s(:string, val[1])
|
||||||
# :nth-child(2)
|
)
|
||||||
| pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) }
|
}
|
||||||
|
|
||||||
# x:nth-child(2)
|
|
||||||
| path_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
|
|
||||||
;
|
;
|
||||||
|
|
||||||
pseudo_name
|
# operator
|
||||||
: T_COLON T_IDENT { val[1] }
|
# : op_members T_EQ op_members { s(:eq, val[0], val[2]) }
|
||||||
;
|
# | op_members T_SPACE_IN op_members { s(:space_in, val[0], val[2]) }
|
||||||
|
# | op_members T_STARTS_WITH op_members { s(:starts_with, val[0], val[2]) }
|
||||||
pseudo_args
|
# | op_members T_ENDS_WITH op_members { s(:ends_with, val[0], val[2]) }
|
||||||
: T_LPAREN pseudo_arg T_RPAREN { val[1] }
|
# | op_members T_IN op_members { s(:in, val[0], val[2]) }
|
||||||
;
|
# | op_members T_HYPHEN_IN op_members { s(:hyphen_in, val[0],val[2]) }
|
||||||
|
# ;
|
||||||
pseudo_arg
|
#
|
||||||
: integer
|
# op_members
|
||||||
| odd
|
# : node_test
|
||||||
| even
|
# | string
|
||||||
| nth
|
# ;
|
||||||
| node_test
|
#
|
||||||
;
|
# axis
|
||||||
|
# # x > y
|
||||||
odd
|
# : step_member T_CHILD step_member { s(:child, val[0], val[2]) }
|
||||||
: T_ODD { s(:odd) }
|
#
|
||||||
;
|
# # x + y
|
||||||
|
# | step_member T_FOLLOWING step_member { s(:following, val[0], val[2]) }
|
||||||
even
|
#
|
||||||
: T_EVEN { s(:even) }
|
# # x ~ y
|
||||||
;
|
# | step_member T_FOLLOWING_DIRECT step_member
|
||||||
|
# {
|
||||||
nth
|
# s(:following_direct, val[0], val[2])
|
||||||
: T_NTH { s(:nth) }
|
# }
|
||||||
| T_MINUS T_NTH { s(:nth) }
|
# ;
|
||||||
| integer T_NTH { s(:nth, val[0]) }
|
#
|
||||||
| integer T_NTH integer { s(:nth, val[0], val[2]) }
|
# pseudo_class
|
||||||
;
|
# # :root
|
||||||
|
# : pseudo_name { s(:pseudo, nil, val[0]) }
|
||||||
string
|
#
|
||||||
: T_STRING { s(:string, val[0]) }
|
# # x:root
|
||||||
;
|
# | step_member pseudo_name { s(:pseudo, val[0], val[1]) }
|
||||||
|
#
|
||||||
integer
|
# # :nth-child(2)
|
||||||
: T_INT { s(:int, val[0].to_i) }
|
# | pseudo_name pseudo_args { s(:pseudo, nil, val[0], val[1]) }
|
||||||
;
|
#
|
||||||
|
# # x:nth-child(2)
|
||||||
|
# | step_member pseudo_name pseudo_args { s(:pseudo, val[0], val[1], val[2]) }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# pseudo_name
|
||||||
|
# : T_COLON T_IDENT { val[1] }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# pseudo_args
|
||||||
|
# : T_LPAREN pseudo_arg T_RPAREN { val[1] }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# pseudo_arg
|
||||||
|
# : integer
|
||||||
|
# | odd
|
||||||
|
# | even
|
||||||
|
# | nth
|
||||||
|
# | node_test
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# odd
|
||||||
|
# : T_ODD { s(:odd) }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# even
|
||||||
|
# : T_EVEN { s(:even) }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# nth
|
||||||
|
# : T_NTH { s(:nth) }
|
||||||
|
# | T_MINUS T_NTH { s(:nth) }
|
||||||
|
# | integer T_NTH { s(:nth, val[0]) }
|
||||||
|
# | integer T_NTH integer { s(:nth, val[0], val[2]) }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# string
|
||||||
|
# : T_STRING { s(:string, val[0]) }
|
||||||
|
# ;
|
||||||
|
#
|
||||||
|
# integer
|
||||||
|
# : T_INT { s(:int, val[0].to_i) }
|
||||||
|
# ;
|
||||||
end
|
end
|
||||||
|
|
||||||
---- inner
|
---- inner
|
||||||
|
|
|
@ -1,30 +0,0 @@
|
||||||
module Oga
|
|
||||||
module CSS
|
|
||||||
##
|
|
||||||
# Transforms an CSS AST into a corresponding XPath AST.
|
|
||||||
#
|
|
||||||
class Transformer < AST::Processor
|
|
||||||
def on_class(node)
|
|
||||||
name, test = node.to_a
|
|
||||||
|
|
||||||
unless test
|
|
||||||
test = s(:test, nil, '*')
|
|
||||||
end
|
|
||||||
|
|
||||||
predicate = s(
|
|
||||||
:eq,
|
|
||||||
s(:axis, 'attribute', s(:test, nil, 'class')),
|
|
||||||
s(:string, name)
|
|
||||||
)
|
|
||||||
|
|
||||||
return s(:axis, 'child', test.updated(nil, test.children + [predicate]))
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def s(type, *children)
|
|
||||||
return AST::Node.new(type, children)
|
|
||||||
end
|
|
||||||
end # Transformer
|
|
||||||
end # CSS
|
|
||||||
end # Oga
|
|
|
@ -3,30 +3,26 @@ require 'spec_helper'
|
||||||
describe Oga::CSS::Parser do
|
describe Oga::CSS::Parser do
|
||||||
context 'classes' do
|
context 'classes' do
|
||||||
example 'parse a class selector' do
|
example 'parse a class selector' do
|
||||||
parse_css('.foo').should == s(:class, nil, 'foo')
|
parse_css('.foo').should == parse_xpath(
|
||||||
|
'descendant-or-self::*[@class="foo"]'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a selector for an element with a class' do
|
example 'parse a selector for an element with a class' do
|
||||||
parse_css('foo.bar').should == s(:class, s(:test, nil, 'foo'), 'bar')
|
parse_css('foo.bar').should == parse_xpath(
|
||||||
|
'descendant-or-self::foo[@class="bar"]'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a selector using multiple classes' do
|
example 'parse a selector using multiple classes' do
|
||||||
parse_css('.foo.bar').should == s(:class, s(:class, nil, 'foo'), 'bar')
|
parse_css('.foo.bar').should == parse_xpath(
|
||||||
|
'descendant-or-self::*[@class="foo" and @class="bar"]'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a selector using a class and an ID' do
|
example 'parse a selector using a class and an ID' do
|
||||||
parse_css('#foo.bar').should == s(:class, s(:id, nil, 'foo'), 'bar')
|
parse_css('#foo.bar').should == parse_xpath(
|
||||||
end
|
'descendant-or-self::*[@id="foo" and @class="bar"]'
|
||||||
|
|
||||||
example 'parse a selector using a class and a pseudo class' do
|
|
||||||
parse_css('.foo:root').should == s(:pseudo, s(:class, nil, 'foo'), 'root')
|
|
||||||
end
|
|
||||||
|
|
||||||
example 'parse a selector using a pseudo class and a class' do
|
|
||||||
parse_css('x:root.foo').should == s(
|
|
||||||
:class,
|
|
||||||
s(:pseudo, s(:test, nil, 'x'), 'root'),
|
|
||||||
'foo'
|
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,26 +3,26 @@ require 'spec_helper'
|
||||||
describe Oga::CSS::Parser do
|
describe Oga::CSS::Parser do
|
||||||
context 'IDs' do
|
context 'IDs' do
|
||||||
example 'parse an ID selector' do
|
example 'parse an ID selector' do
|
||||||
parse_css('#foo').should == s(:id, nil, 'foo')
|
parse_css('#foo').should == parse_xpath(
|
||||||
|
'descendant-or-self::*[@id="foo"]'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a selector for an element with an ID' do
|
example 'parse a selector for an element with an ID' do
|
||||||
parse_css('foo#bar').should == s(:id, s(:test, nil, 'foo'), 'bar')
|
parse_css('foo#bar').should == parse_xpath(
|
||||||
|
'descendant-or-self::foo[@id="bar"]'
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
example 'parse a selector using multiple IDs' do
|
||||||
|
parse_css('#foo#bar').should == parse_xpath(
|
||||||
|
'descendant-or-self::*[@id="foo" and @id="bar"]'
|
||||||
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
example 'parse a selector using an ID and a class' do
|
example 'parse a selector using an ID and a class' do
|
||||||
parse_css('.foo#bar').should == s(:id, s(:class, nil, 'foo'), 'bar')
|
parse_css('.foo#bar').should == parse_xpath(
|
||||||
end
|
'descendant-or-self::*[@class="foo" and @id="bar"]'
|
||||||
|
|
||||||
example 'parse a selector using an ID and a pseudo class' do
|
|
||||||
parse_css('#foo:root').should == s(:pseudo, s(:id, nil, 'foo'), 'root')
|
|
||||||
end
|
|
||||||
|
|
||||||
example 'parse a selector using a pseudo class and an ID' do
|
|
||||||
parse_css('x:root#foo').should == s(
|
|
||||||
:id,
|
|
||||||
s(:pseudo, s(:test, nil, 'x'), 'root'),
|
|
||||||
'foo'
|
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
require 'spec_helper'
|
|
||||||
|
|
||||||
describe Oga::CSS::Transformer do
|
|
||||||
context 'classes' do
|
|
||||||
example 'convert a class node without a node test' do
|
|
||||||
transform_css('.y').should == parse_xpath('*[@class="y"]')
|
|
||||||
end
|
|
||||||
|
|
||||||
example 'convert a class node with a node test' do
|
|
||||||
transform_css('x.y').should == parse_xpath('x[@class="y"]')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -89,17 +89,5 @@ module Oga
|
||||||
rescue Racc::ParseError => error
|
rescue Racc::ParseError => error
|
||||||
return error.message
|
return error.message
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
|
||||||
# Parses and transforms a CSS AST into an XPath AST.
|
|
||||||
#
|
|
||||||
# @param [String] css
|
|
||||||
# @return [AST::Node]
|
|
||||||
#
|
|
||||||
def transform_css(css)
|
|
||||||
ast = parse_css(css)
|
|
||||||
|
|
||||||
return Oga::CSS::Transformer.new.process(ast)
|
|
||||||
end
|
|
||||||
end # ParsingHelpers
|
end # ParsingHelpers
|
||||||
end # Oga
|
end # Oga
|
||||||
|
|
Loading…
Reference in New Issue