Ported the CSS parser to ruby-ll

2015-03-21 00:59:53 +01:00 · 2015-03-21 00:59:53 +01:00 · ed14981044
parent 70e4942d3e
commit ed14981044
1 changed files with 696 additions and 0 deletions
--- a/lib/oga/css/parser.rll
+++ b/lib/oga/css/parser.rll
@ -0,0 +1,696 @@
+%header
+{
+##
+# AST parser for CSS expressions.
+#
+# This parser does _not_ build a CSS specific AST, instead it directly produces
+# an XPath AST. This removes the need to transform the AST or generate
+# corresponding XPath expressions as a String.
+#
+# Similar to {Oga::XPath::Parser} this parser only takes String instances as
+# input.
+#
+}
+
+%name Oga::CSS::Parser;
+
+%terminals T_IDENT T_PIPE T_LBRACK T_RBRACK T_COLON T_SPACE T_LPAREN T_RPAREN;
+%terminals T_MINUS T_EQ T_SPACE_IN T_STARTS_WITH T_ENDS_WITH T_IN T_HYPHEN_IN;
+%terminals T_GREATER T_TILDE T_PLUS T_NTH T_INT T_STRING T_ODD T_EVEN T_DOT;
+%terminals T_HASH;
+
+css
+  = selectors
+  | _ { nil }
+  ;
+
+selectors
+  = selector selectors_follow*
+    {
+      # Single selector
+      if val[1].empty?
+        ret = val[0]
+
+        if ret.is_a?(Array)
+          ret = s(:path, *ret)
+        end
+
+      # Multiple selectors
+      else
+        steps = [val[0]]
+
+        val[1].each do |step|
+          # "+ foo" is broken up into two steps.
+          if step.is_a?(Array)
+            # Using Array#+ or Array#| would require allocating an extra Array
+            step.each { |sub| steps << sub }
+          else
+            steps << step
+          end
+        end
+
+        ret = s(:path, *steps)
+      end
+
+      ret
+    }
+  ;
+
+selectors_follow
+  = T_SPACE selector { val[1] }
+  ;
+
+selector
+  = descendant_or_self predicates?
+    {
+      val[1] ? s(:predicate, val[0], val[1]) : val[0]
+    }
+  | axis predicates?
+    {
+      val[1] ? s(:predicate, val[0], val[1]) : val[0]
+    }
+  | predicates
+    {
+      s(:predicate, s(:axis, 'descendant', on_test(nil, '*')), val[0])
+    }
+  ;
+
+descendant_or_self
+  = node_test { s(:axis, 'descendant', val[0]) }
+  ;
+
+axis
+  # > foo
+  = T_GREATER axis_selector
+    {
+      s(:axis, 'child', val[1])
+    }
+
+  # ~ foo
+  | T_TILDE axis_selector
+    {
+      s(:axis, 'following-sibling', val[1])
+    }
+
+  # + foo
+  | T_PLUS axis_selector
+    {
+      [
+        s(
+          :predicate,
+          s(:axis, 'following-sibling', on_test(nil, '*')),
+          s(:int, 1)
+        ),
+        s(:axis, 'self', val[1])
+      ]
+    }
+  ;
+
+axis_selector
+  = node_test
+  | axis
+  ;
+
+node_test
+  = node_name { on_test(*val[0]) }
+  ;
+
+node_name
+  = T_IDENT node_name_pipe? { val[1] ? [val[0], val[1]] : [nil, val[0]] }
+  ;
+
+node_name_pipe
+  = T_PIPE T_IDENT { val[1] }
+  ;
+
+predicates
+  = predicate predicate*
+    {
+      ret = val[0]
+
+      val[1].each do |pred|
+        ret = s(:and, ret, pred)
+      end
+
+      ret
+    }
+  ;
+
+predicate
+  = class
+  | id
+  | pseudo_class
+  | attribute_predicate
+  ;
+
+attribute_predicate
+  = T_LBRACK attribute_predicate_members T_RBRACK { val[1] }
+  ;
+
+attribute_predicate_members
+  = attribute_or_operator
+  ;
+
+attribute
+  = node_name { s(:axis, 'attribute', on_test(*val[0])) }
+  ;
+
+attribute_or_operator
+  = attribute (operator_type string)?
+    {
+      op_type = val[1] ? val[1][0] : nil
+
+      case op_type
+      # a="b"
+      when :eq
+        on_op_eq(val[0], val[1][1])
+
+      # a~="b"
+      when :space_in
+        on_op_space_in(val[0], val[1][1])
+
+      # a^="b"
+      when :starts_with
+        on_op_starts_with(val[0], val[1][1])
+
+      # a$="b"
+      when :ends_with
+        on_op_ends_with(val[0], val[1][1])
+
+      # a*="b"
+      when :in
+        on_op_in(val[0], val[1][1])
+
+      # a|="b"
+      when :hyphen_in
+        on_op_hyphen_in(val[0], val[1][1])
+
+      else
+        val[0]
+      end
+    }
+  ;
+
+operator_type
+  = T_EQ          { :eq }
+  | T_SPACE_IN    { :space_in }
+  | T_STARTS_WITH { :starts_with }
+  | T_ENDS_WITH   { :ends_with }
+  | T_IN          { :in }
+  | T_HYPHEN_IN   { :hyphen_in }
+  ;
+
+class
+  = T_DOT T_IDENT
+    {
+      axis = s(:axis, 'attribute', s(:test, nil, 'class'))
+
+      s(
+        :call,
+        'contains',
+        s(:call, 'concat', s(:string, ' '), axis, s(:string, ' ')),
+        s(:string, " #{val[1]} ")
+      )
+    }
+  ;
+
+id
+  = T_HASH T_IDENT
+    {
+      s(
+        :eq,
+        s(:axis, 'attribute', s(:test, nil, 'id')),
+        s(:string, val[1])
+      )
+    }
+  ;
+
+pseudo_class
+  = pseudo_name pseudo_args?  { on_pseudo_class(val[0], val[1]) }
+  ;
+
+pseudo_name
+  = T_COLON T_IDENT { val[1] }
+  ;
+
+pseudo_args
+  = T_LPAREN pseudo_arg T_RPAREN { val[1] }
+  ;
+
+pseudo_arg
+  = odd
+  | even
+  | nth_or_integer
+  | selector
+  ;
+
+string
+  = T_STRING { s(:string, val[0]) }
+  ;
+
+integer
+  = T_INT { s(:int, val[0].to_i) }
+  ;
+
+# These AST nodes are _not_ the final AST nodes. Instead they are used by
+# on_pseudo_class_nth_child() to determine what the final AST should be.
+
+nth_or_integer
+  # n, n+2
+  = nth integer?
+    {
+      val[1] ? s(:nth, s(:int, 1), val[1]) : s(:nth, s(:int, 1))
+    }
+
+  # -n, -n+2, -n-2
+  | T_MINUS nth integer?
+    {
+      val[2] ? s(:nth, s(:int, -1), val[2]) : s(:nth, s(:int, 1))
+    }
+
+  # 2, 2n, 2n+1, 2n-1
+  | integer nth? integer?
+    {
+      # 2n+1
+      if val[1] and val[2]
+        a = val[0]
+        b = val[2]
+
+        # 2n-1 gets turned into 2n+1
+        if b.children[0] < 0
+          b = s(:int, a.children[0] - (b.children[0] % a.children[0]))
+        end
+
+        s(:nth, a, b)
+
+      # 2n
+      elsif val[1]
+        s(:nth, val[0])
+
+      # 2
+      else
+        val[0]
+      end
+    }
+  ;
+
+# T_NTH has a nil value, meaning you can't use it with the "?" operator combined
+# with an if statement (as nil evaluates to false).
+nth
+  = T_NTH { :nth }
+  ;
+
+odd
+  = T_ODD { s(:nth, s(:int, 2), s(:int, 1)) }
+  ;
+
+even
+  = T_EVEN { s(:nth, s(:int, 2)) }
+  ;
+
+%inner
+{
+  ##
+  # @param [String] data The input to parse.
+  #
+  def initialize(data)
+    @lexer = Lexer.new(data)
+  end
+
+  ##
+  # Resets the internal state of the parser.
+  #
+  def reset
+    @current_element = nil
+  end
+
+  ##
+  # @param [Symbol] type
+  # @param [Array] children
+  # @return [AST::Node]
+  #
+  def s(type, *children)
+    return AST::Node.new(type, children)
+  end
+
+  ##
+  # Yields the next token from the lexer.
+  #
+  # @yieldparam [Array]
+  #
+  def each_token
+    @lexer.advance do |*args|
+      yield args
+    end
+
+    yield [-1, -1]
+  end
+
+  ##
+  # Returns the node test for the current element.
+  #
+  # @return [AST::Node]
+  #
+  def current_element
+    return @current_element ||= s(:test, nil, '*')
+  end
+
+  ##
+  # Parses the input and returns the corresponding AST.
+  #
+  # @example
+  #  parser = Oga::CSS::Parser.new('foo.bar')
+  #  ast    = parser.parse
+  #
+  # @return [AST::Node]
+  #
+  def parse
+    reset
+
+    return super
+  end
+
+  ##
+  # Generates the AST for a node test.
+  #
+  # @param [String] namespace
+  # @param [String] name
+  # @return [AST::Node]
+  #
+  def on_test(namespace, name)
+    @current_element = s(:test, namespace, name)
+
+    return @current_element
+  end
+
+  ##
+  # @param [String] name
+  # @param [AST::Node] arg
+  # @return [AST::Node]
+  #
+  def on_pseudo_class(name, arg = nil)
+    handler = "on_pseudo_class_#{name.gsub('-', '_')}"
+
+    return arg ? send(handler, arg) : send(handler)
+  end
+
+  ##
+  # Generates the AST for the `root` pseudo class.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_root
+    return s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*')))
+  end
+
+  ##
+  # Generates the AST for the `nth-child` pseudo class.
+  #
+  # @param [AST::Node] arg
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_nth_child(arg)
+    return generate_nth_child('preceding-sibling', arg)
+  end
+
+  ##
+  # Generates the AST for the `nth-last-child` pseudo class.
+  #
+  # @param [AST::Node] arg
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_nth_last_child(arg)
+    return generate_nth_child('following-sibling', arg)
+  end
+
+  ##
+  # Generates the AST for the `nth-of-type` pseudo class.
+  #
+  # @param [AST::Node] arg
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_nth_of_type(arg)
+    return generate_nth_child('preceding-sibling', arg, current_element)
+  end
+
+  ##
+  # Generates the AST for the `nth-last-of-type` pseudo class.
+  #
+  # @param [AST::Node] arg
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_nth_last_of_type(arg)
+    return generate_nth_child('following-sibling', arg, current_element)
+  end
+
+  ##
+  # Generates the AST for the `:first-child` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_first_child
+    return generate_no_siblings('preceding-sibling')
+  end
+
+  ##
+  # Generates the AST for the `:last-child` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_last_child
+    return generate_no_siblings('following-sibling')
+  end
+
+  ##
+  # Generates the AST for the `:first-of-type` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_first_of_type
+    return generate_no_siblings('preceding-sibling', current_element)
+  end
+
+  ##
+  # Generates the AST for the `:last-of-type` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_last_of_type
+    return generate_no_siblings('following-sibling', current_element)
+  end
+
+  ##
+  # Generates the AST for the `:only-child` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_only_child
+    return s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child)
+  end
+
+  ##
+  # Generates the AST for the `:only-of-type` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_only_of_type
+    return s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type)
+  end
+
+  ##
+  # Generates the AST for the `:empty` selector.
+  #
+  # @return [AST::Node]
+  #
+  def on_pseudo_class_empty
+    return s(:call, 'not', s(:axis, 'child', s(:type_test, 'node')))
+  end
+
+  ##
+  # Generates the AST for the `=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_eq(attr, value)
+    return s(:eq, attr, value)
+  end
+
+  ##
+  # Generates the AST for the `~=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_space_in(attr, value)
+    return s(
+      :call,
+      'contains',
+      s(:call, 'concat', s(:string, ' '), attr, s(:string, ' ')),
+      s(:call, 'concat', s(:string, ' '), value, s(:string, ' '))
+    )
+  end
+
+  ##
+  # Generates the AST for the `^=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_starts_with(attr, value)
+    return s(:call, 'starts-with', attr, value)
+  end
+
+  ##
+  # Generates the AST for the `$=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_ends_with(attr, value)
+    return s(
+      :eq,
+      s(
+        :call,
+        'substring',
+        attr,
+        s(
+          :add,
+          s(
+            :sub,
+            s(:call, 'string-length', attr),
+            s(:call, 'string-length', value)
+          ),
+          s(:int, 1)
+        ),
+        s(:call, 'string-length', value)
+      ),
+      value
+    )
+  end
+
+  ##
+  # Generates the AST for the `*=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_in(attr, value)
+    return s(:call, 'contains', attr, value)
+  end
+
+  ##
+  # Generates the AST for the `|=` operator.
+  #
+  # @param [AST::Node] attr
+  # @param [AST::Node] value
+  # @return [AST::Node]
+  #
+  def on_op_hyphen_in(attr, value)
+    return s(
+      :or,
+      s(:eq, attr, value),
+      s(
+        :call,
+        'starts-with',
+        attr,
+        s(:call, 'concat', value, s(:string, '-'))
+      )
+    )
+  end
+
+  private
+
+  ##
+  # @param [String] count_axis
+  # @param [AST::Node] arg
+  # @param [AST::Node] count_test
+  # @return [AST::Node]
+  #
+  def generate_nth_child(count_axis, arg, count_test = s(:test, nil, '*'))
+    count_call = s(:call, 'count', s(:axis, count_axis, count_test))
+
+   # literal 2, 4, etc
+    if int_node?(arg)
+      node = s(:eq, count_call, s(:int, arg.children[0] - 1))
+    else
+      step, offset = *arg
+      before_count = s(:add, count_call, s(:int, 1))
+      compare      = step_comparison(step)
+
+      # 2n+2, 2n-4, etc
+      if offset
+        mod_val = step_modulo_value(step)
+        node    = s(
+          :and,
+          s(compare, before_count, offset),
+          s(:eq, s(:mod, s(:sub, before_count, offset), mod_val), s(:int, 0))
+        )
+
+      # 2n, n, -2n
+      else
+        node = s(:eq, s(:mod, before_count, step), s(:int, 0))
+      end
+    end
+
+    return node
+  end
+
+  ##
+  # @param [String] axis
+  # @param [AST::Node] test
+  # @return [AST::Node]
+  #
+  def generate_no_siblings(axis, test = s(:test, nil, '*'))
+    return s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0))
+  end
+
+  ##
+  # @param [AST::Node] node
+  # @return [TrueClass|FalseClass]
+  #
+  def int_node?(node)
+    return node.type == :int
+  end
+
+  ##
+  # @param [AST::Node] node
+  # @return [TrueClass|FalseClass]
+  #
+  def non_positive_number?(node)
+    return node.children[0] <= 0
+  end
+
+  ##
+  # @param [AST::Node] node
+  # @return [Symbol]
+  #
+  def step_comparison(node)
+    return node.children[0] >= 0 ? :gte : :lte
+  end
+
+  ##
+  # @param [AST::Node] step
+  # @return [AST::Node]
+  #
+  def step_modulo_value(step)
+    # -2n
+    if step and non_positive_number?(step)
+      mod_val = s(:int, -step.children[0])
+
+    # 2n
+    elsif step
+      mod_val = step
+
+    else
+      mod_val = s(:int, 1)
+    end
+
+    return mod_val
+  end
+}