From e11b9ed32c3bd0191a9b80439a15d370df63a639 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Sun, 1 Jun 2014 23:02:28 +0200 Subject: [PATCH] Basic XPath parser setup. --- .gitignore | 1 + Rakefile | 5 +-- lib/oga.rb | 3 ++ lib/oga/xpath/node.rb | 10 +++++ lib/oga/xpath/parser.y | 81 +++++++++++++++++++++++++++++++++++ spec/oga/xpath/parser_spec.rb | 5 +++ task/parser.rake | 2 +- 7 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 lib/oga/xpath/node.rb create mode 100644 lib/oga/xpath/parser.y create mode 100644 spec/oga/xpath/parser_spec.rb diff --git a/.gitignore b/.gitignore index 4078dde..ad4f1c8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ Gemfile.lock lib/oga/xml/parser.rb lib/oga/xpath/lexer.rb +lib/oga/xpath/parser.rb lib/liboga.* diff --git a/Rakefile b/Rakefile index 1313afc..23e1d0f 100644 --- a/Rakefile +++ b/Rakefile @@ -18,13 +18,12 @@ else end end -PARSER_OUTPUT = 'lib/oga/xml/parser.rb' - CLEAN.include( 'coverage', 'yardoc', - PARSER_OUTPUT, + 'lib/oga/xml/parser.rb', 'lib/oga/xpath/lexer.rb', + 'lib/oga/xpath/parser.rb', 'benchmark/fixtures/big.xml', 'profile/samples/**/*.txt', 'lib/liboga.*', diff --git a/lib/oga.rb b/lib/oga.rb index 44ae6e6..731005e 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -1,3 +1,4 @@ +require 'ast' require 'set' # Load these first so that the native extensions don't have to define the @@ -23,4 +24,6 @@ require_relative 'oga/xml/doctype' require_relative 'oga/html/parser' +require_relative 'oga/xpath/node' require_relative 'oga/xpath/lexer' +require_relative 'oga/xpath/parser' diff --git a/lib/oga/xpath/node.rb b/lib/oga/xpath/node.rb new file mode 100644 index 0000000..da28e53 --- /dev/null +++ b/lib/oga/xpath/node.rb @@ -0,0 +1,10 @@ +module Oga + module XPath + ## + # AST node for XPath expressions. + # + class Node < AST::Node + + end # Node + end # XPath +end # Oga diff --git a/lib/oga/xpath/parser.y b/lib/oga/xpath/parser.y new file mode 100644 index 0000000..6631418 --- /dev/null +++ b/lib/oga/xpath/parser.y @@ -0,0 +1,81 @@ +## +# Parser for XPath expressions. +# +class Oga::XPath::Parser + +token T_AXIS T_COLON T_COMMA T_FLOAT T_INT T_IDENT T_OP +token T_LBRACK T_RBRACK T_LPAREN T_RPAREN T_SLASH T_STRING + +options no_result_var + +rule + expressions + : expressions_ { s(:xpath, *val[0]) } + | /* none */ { s(:xpath) } + ; + + expressions_ + : expressions_ expression { val[0] << val[1] } + | expression { val } + ; + + expression + : node_test + ; + + node_test + : T_IDENT { s(:node, nil, val[0]) } + | T_IDENT T_COLON T_IDENT { s(:node, val[0], val[2]) } + ; +end + +---- inner + ## + # @param [String] data The input to parse. + # + # @param [Hash] options + # + def initialize(data) + @data = data + @lexer = Lexer.new(data) + end + + ## + # Creates a new XPath node. + # + # @param [Symbol] type + # @param [Array] children + # @return [Oga::XPath::Node] + # + def s(type, *children) + return Node.new(type, children) + end + + ## + # Yields the next token from the lexer. + # + # @yieldparam [Array] + # + def yield_next_token + @lexer.advance do |*args| + yield args + end + + yield [false, false] + end + + ## + # Parses the input and returns the corresponding AST. + # + # @example + # parser = Oga::XPath::Parser.new('//foo') + # ast = parser.parse + # + # @return [Oga::AST::Node] + # + def parse + ast = yyparse(self, :yield_next_token) + + return ast + end +# vim: set ft=racc: diff --git a/spec/oga/xpath/parser_spec.rb b/spec/oga/xpath/parser_spec.rb new file mode 100644 index 0000000..0d3567e --- /dev/null +++ b/spec/oga/xpath/parser_spec.rb @@ -0,0 +1,5 @@ +require 'spec_helper' + +describe Oga::XPath::Parser do + pending 'Write me!' +end diff --git a/task/parser.rake b/task/parser.rake index 10ebfca..562a025 100644 --- a/task/parser.rake +++ b/task/parser.rake @@ -3,4 +3,4 @@ rule '.rb' => '.y' do |task| end desc 'Generates the parser' -task :parser => [PARSER_OUTPUT] +task :parser => ['lib/oga/xml/parser.rb', 'lib/oga/xpath/parser.rb']