Removed start/end comments of YARD blocks

This commit is contained in:
Yorick Peterse 2015-09-01 19:59:52 +02:00
parent 929a521641
commit 94f8ed5421
39 changed files with 0 additions and 734 deletions

View File

@ -1,38 +1,28 @@
module Oga module Oga
##
# @api private # @api private
#
class Blacklist class Blacklist
# @return [Set] # @return [Set]
attr_reader :names attr_reader :names
##
# @param [Array] names # @param [Array] names
#
def initialize(names) def initialize(names)
@names = Set.new(names + names.map(&:upcase)) @names = Set.new(names + names.map(&:upcase))
end end
##
# @yieldparam [String] # @yieldparam [String]
#
def each def each
names.each do |value| names.each do |value|
yield value yield value
end end
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def allow?(name) def allow?(name)
!names.include?(name) !names.include?(name)
end end
##
# @param [Oga::Blacklist] other # @param [Oga::Blacklist] other
# @return [Oga::Blacklist] # @return [Oga::Blacklist]
#
def +(other) def +(other)
self.class.new(names + other.names) self.class.new(names + other.names)
end end

View File

@ -2,7 +2,6 @@
module Oga module Oga
module CSS module CSS
##
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are # Lexer for turning CSS expressions into a sequence of tokens. Tokens are
# returned as arrays with every array having two values: # returned as arrays with every array having two values:
# #
@ -17,25 +16,20 @@ module Oga
# instances in threads just fine. # instances in threads just fine.
# #
# @api private # @api private
#
class Lexer class Lexer
%% write data; %% write data;
# % fix highlight # % fix highlight
##
# @param [String] data The data to lex. # @param [String] data The data to lex.
#
def initialize(data) def initialize(data)
@data = data @data = data
end end
##
# Gathers all the tokens for the input and returns them as an Array. # Gathers all the tokens for the input and returns them as an Array.
# #
# @see [#advance] # @see [#advance]
# @return [Array] # @return [Array]
#
def lex def lex
tokens = [] tokens = []
@ -46,7 +40,6 @@ module Oga
return tokens return tokens
end end
##
# Advances through the input and generates the corresponding tokens. Each # Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block. # token is yielded to the supplied block.
# #
@ -54,7 +47,6 @@ module Oga
# the lexer loop has finished. # the lexer loop has finished.
# #
# @see [#add_token] # @see [#add_token]
#
def advance(&block) def advance(&block)
@block = block @block = block
@ -88,7 +80,6 @@ module Oga
private private
##
# Emits a token of which the value is based on the supplied start/stop # Emits a token of which the value is based on the supplied start/stop
# position. # position.
# #
@ -98,25 +89,21 @@ module Oga
# #
# @see [#text] # @see [#text]
# @see [#add_token] # @see [#add_token]
#
def emit(type, start, stop) def emit(type, start, stop)
value = slice_input(start, stop) value = slice_input(start, stop)
add_token(type, value) add_token(type, value)
end end
##
# Returns the text between the specified start and stop position. # Returns the text between the specified start and stop position.
# #
# @param [Fixnum] start # @param [Fixnum] start
# @param [Fixnum] stop # @param [Fixnum] stop
# @return [String] # @return [String]
#
def slice_input(start, stop) def slice_input(start, stop)
return @data.byteslice(start, stop - start) return @data.byteslice(start, stop - start)
end end
##
# Yields a new token to the supplied block. # Yields a new token to the supplied block.
# #
# @param [Symbol] type The token type. # @param [Symbol] type The token type.
@ -124,7 +111,6 @@ module Oga
# #
# @yieldparam [Symbol] type # @yieldparam [Symbol] type
# @yieldparam [String|NilClass] value # @yieldparam [String|NilClass] value
#
def add_token(type, value = nil) def add_token(type, value = nil)
@block.call(type, value) @block.call(type, value)
end end
@ -202,7 +188,6 @@ module Oga
# #
# Strings can be single or double quoted. They are mainly used for # Strings can be single or double quoted. They are mainly used for
# attribute values. # attribute values.
#
dquote = '"'; dquote = '"';
squote = "'"; squote = "'";

View File

@ -1,6 +1,5 @@
%header %header
{ {
##
# AST parser for CSS expressions. # AST parser for CSS expressions.
# #
# This parser does _not_ build a CSS specific AST, instead it directly produces # This parser does _not_ build a CSS specific AST, instead it directly produces
@ -327,47 +326,35 @@ even
%inner %inner
{ {
##
# @return [Oga::LRU] # @return [Oga::LRU]
#
CACHE = LRU.new CACHE = LRU.new
##
# @param [String] data # @param [String] data
# @return [AST::Node] # @return [AST::Node]
#
def self.parse_with_cache(data) def self.parse_with_cache(data)
CACHE.get_or_set(data) { new(data).parse } CACHE.get_or_set(data) { new(data).parse }
end end
##
# @param [String] data The input to parse. # @param [String] data The input to parse.
#
def initialize(data) def initialize(data)
@lexer = Lexer.new(data) @lexer = Lexer.new(data)
end end
##
# Resets the internal state of the parser. # Resets the internal state of the parser.
#
def reset def reset
@current_element = nil @current_element = nil
end end
##
# @param [Symbol] type # @param [Symbol] type
# @param [Array] children # @param [Array] children
# @return [AST::Node] # @return [AST::Node]
#
def s(type, *children) def s(type, *children)
AST::Node.new(type, children) AST::Node.new(type, children)
end end
##
# Yields the next token from the lexer. # Yields the next token from the lexer.
# #
# @yieldparam [Array] # @yieldparam [Array]
#
def each_token def each_token
@lexer.advance do |*args| @lexer.advance do |*args|
yield args yield args
@ -376,16 +363,13 @@ even
yield [-1, -1] yield [-1, -1]
end end
##
# Returns the node test for the current element. # Returns the node test for the current element.
# #
# @return [AST::Node] # @return [AST::Node]
#
def current_element def current_element
@current_element ||= s(:test, nil, '*') @current_element ||= s(:test, nil, '*')
end end
##
# Parses the input and returns the corresponding AST. # Parses the input and returns the corresponding AST.
# #
# @example # @example
@ -393,175 +377,140 @@ even
# ast = parser.parse # ast = parser.parse
# #
# @return [AST::Node] # @return [AST::Node]
#
def parse def parse
reset reset
super super
end end
##
# Generates the AST for a node test. # Generates the AST for a node test.
# #
# @param [String] namespace # @param [String] namespace
# @param [String] name # @param [String] name
# @return [AST::Node] # @return [AST::Node]
#
def on_test(namespace, name) def on_test(namespace, name)
@current_element = s(:test, namespace, name) @current_element = s(:test, namespace, name)
end end
##
# @param [String] name # @param [String] name
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class(name, arg = nil) def on_pseudo_class(name, arg = nil)
handler = "on_pseudo_class_#{name.gsub('-', '_')}" handler = "on_pseudo_class_#{name.gsub('-', '_')}"
arg ? send(handler, arg) : send(handler) arg ? send(handler, arg) : send(handler)
end end
##
# Generates the AST for the `root` pseudo class. # Generates the AST for the `root` pseudo class.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_root def on_pseudo_class_root
s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*'))) s(:call, 'not', s(:axis, 'parent', s(:test, nil, '*')))
end end
##
# Generates the AST for the `nth-child` pseudo class. # Generates the AST for the `nth-child` pseudo class.
# #
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_nth_child(arg) def on_pseudo_class_nth_child(arg)
generate_nth_child('preceding-sibling', arg) generate_nth_child('preceding-sibling', arg)
end end
##
# Generates the AST for the `nth-last-child` pseudo class. # Generates the AST for the `nth-last-child` pseudo class.
# #
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_nth_last_child(arg) def on_pseudo_class_nth_last_child(arg)
generate_nth_child('following-sibling', arg) generate_nth_child('following-sibling', arg)
end end
##
# Generates the AST for the `nth-of-type` pseudo class. # Generates the AST for the `nth-of-type` pseudo class.
# #
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_nth_of_type(arg) def on_pseudo_class_nth_of_type(arg)
generate_nth_child('preceding-sibling', arg, current_element) generate_nth_child('preceding-sibling', arg, current_element)
end end
##
# Generates the AST for the `nth-last-of-type` pseudo class. # Generates the AST for the `nth-last-of-type` pseudo class.
# #
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_nth_last_of_type(arg) def on_pseudo_class_nth_last_of_type(arg)
generate_nth_child('following-sibling', arg, current_element) generate_nth_child('following-sibling', arg, current_element)
end end
##
# Generates the AST for the `nth` pseudo class. # Generates the AST for the `nth` pseudo class.
# #
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_nth(arg) def on_pseudo_class_nth(arg)
s(:eq, s(:call, 'position'), arg) s(:eq, s(:call, 'position'), arg)
end end
##
# Generates the AST for the `:first-child` selector. # Generates the AST for the `:first-child` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_first_child def on_pseudo_class_first_child
generate_no_siblings('preceding-sibling') generate_no_siblings('preceding-sibling')
end end
##
# Generates the AST for the `:last-child` selector. # Generates the AST for the `:last-child` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_last_child def on_pseudo_class_last_child
generate_no_siblings('following-sibling') generate_no_siblings('following-sibling')
end end
##
# Generates the AST for the `:first-of-type` selector. # Generates the AST for the `:first-of-type` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_first_of_type def on_pseudo_class_first_of_type
generate_no_siblings('preceding-sibling', current_element) generate_no_siblings('preceding-sibling', current_element)
end end
##
# Generates the AST for the `:last-of-type` selector. # Generates the AST for the `:last-of-type` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_last_of_type def on_pseudo_class_last_of_type
generate_no_siblings('following-sibling', current_element) generate_no_siblings('following-sibling', current_element)
end end
##
# Generates the AST for the `:only-child` selector. # Generates the AST for the `:only-child` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_only_child def on_pseudo_class_only_child
s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child) s(:and, on_pseudo_class_first_child, on_pseudo_class_last_child)
end end
##
# Generates the AST for the `:only-of-type` selector. # Generates the AST for the `:only-of-type` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_only_of_type def on_pseudo_class_only_of_type
s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type) s(:and, on_pseudo_class_first_of_type, on_pseudo_class_last_of_type)
end end
##
# Generates the AST for the `:empty` selector. # Generates the AST for the `:empty` selector.
# #
# @return [AST::Node] # @return [AST::Node]
#
def on_pseudo_class_empty def on_pseudo_class_empty
s(:call, 'not', s(:axis, 'child', s(:type_test, 'node'))) s(:call, 'not', s(:axis, 'child', s(:type_test, 'node')))
end end
##
# Generates the AST for the `=` operator. # Generates the AST for the `=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_eq(attr, value) def on_op_eq(attr, value)
s(:eq, attr, value) s(:eq, attr, value)
end end
##
# Generates the AST for the `~=` operator. # Generates the AST for the `~=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_space_in(attr, value) def on_op_space_in(attr, value)
s( s(
:call, :call,
@ -571,24 +520,20 @@ even
) )
end end
##
# Generates the AST for the `^=` operator. # Generates the AST for the `^=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_starts_with(attr, value) def on_op_starts_with(attr, value)
s(:call, 'starts-with', attr, value) s(:call, 'starts-with', attr, value)
end end
##
# Generates the AST for the `$=` operator. # Generates the AST for the `$=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_ends_with(attr, value) def on_op_ends_with(attr, value)
s( s(
:eq, :eq,
@ -611,24 +556,20 @@ even
) )
end end
##
# Generates the AST for the `*=` operator. # Generates the AST for the `*=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_in(attr, value) def on_op_in(attr, value)
s(:call, 'contains', attr, value) s(:call, 'contains', attr, value)
end end
##
# Generates the AST for the `|=` operator. # Generates the AST for the `|=` operator.
# #
# @param [AST::Node] attr # @param [AST::Node] attr
# @param [AST::Node] value # @param [AST::Node] value
# @return [AST::Node] # @return [AST::Node]
#
def on_op_hyphen_in(attr, value) def on_op_hyphen_in(attr, value)
s( s(
:or, :or,
@ -644,12 +585,10 @@ even
private private
##
# @param [String] count_axis # @param [String] count_axis
# @param [AST::Node] arg # @param [AST::Node] arg
# @param [AST::Node] count_test # @param [AST::Node] count_test
# @return [AST::Node] # @return [AST::Node]
#
def generate_nth_child(count_axis, arg, count_test = s(:test, nil, '*')) def generate_nth_child(count_axis, arg, count_test = s(:test, nil, '*'))
count_call = s(:call, 'count', s(:axis, count_axis, count_test)) count_call = s(:call, 'count', s(:axis, count_axis, count_test))
@ -679,43 +618,33 @@ even
node node
end end
##
# @param [String] axis # @param [String] axis
# @param [AST::Node] test # @param [AST::Node] test
# @return [AST::Node] # @return [AST::Node]
#
def generate_no_siblings(axis, test = s(:test, nil, '*')) def generate_no_siblings(axis, test = s(:test, nil, '*'))
s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0)) s(:eq, s(:call, 'count', s(:axis, axis, test)), s(:int, 0))
end end
##
# @param [AST::Node] node # @param [AST::Node] node
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def int_node?(node) def int_node?(node)
node.type.equal?(:int) node.type.equal?(:int)
end end
##
# @param [AST::Node] node # @param [AST::Node] node
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def non_positive_number?(node) def non_positive_number?(node)
node.children[0] <= 0 node.children[0] <= 0
end end
##
# @param [AST::Node] node # @param [AST::Node] node
# @return [Symbol] # @return [Symbol]
#
def step_comparison(node) def step_comparison(node)
node.children[0] >= 0 ? :gte : :lte node.children[0] >= 0 ? :gte : :lte
end end
##
# @param [AST::Node] step # @param [AST::Node] step
# @return [AST::Node] # @return [AST::Node]
#
def step_modulo_value(step) def step_modulo_value(step)
# -2n # -2n
if step and non_positive_number?(step) if step and non_positive_number?(step)

View File

@ -1,17 +1,13 @@
module Oga module Oga
module EntityDecoder module EntityDecoder
##
# @see [decode] # @see [decode]
#
def self.try_decode(input, html = false) def self.try_decode(input, html = false)
input ? decode(input, html) : nil input ? decode(input, html) : nil
end end
##
# @param [String] input # @param [String] input
# @param [TrueClass|FalseClass] html # @param [TrueClass|FalseClass] html
# @return [String] # @return [String]
#
def self.decode(input, html = false) def self.decode(input, html = false)
decoder = html ? HTML::Entities : XML::Entities decoder = html ? HTML::Entities : XML::Entities

View File

@ -1,14 +1,12 @@
module Oga module Oga
module HTML module HTML
module Entities module Entities
##
# Hash mapping HTML entities to their Unicode character replacements. # Hash mapping HTML entities to their Unicode character replacements.
# #
# Based on the JSON output as listed at # Based on the JSON output as listed at
# http://www.w3.org/TR/html5/syntax.html#named-character-references # http://www.w3.org/TR/html5/syntax.html#named-character-references
# #
# @return [Hash] # @return [Hash]
#
DECODE_MAPPING = { DECODE_MAPPING = {
'&Aacute;' => [193].pack('U*'), '&Aacute;' => [193].pack('U*'),
'&aacute;' => [225].pack('U*'), '&aacute;' => [225].pack('U*'),
@ -2137,11 +2135,9 @@ module Oga
'&zwnj;' => [8204].pack('U*'), '&zwnj;' => [8204].pack('U*'),
} }
##
# Decodes HTML entities. # Decodes HTML entities.
# #
# @see [decode] # @see [decode]
#
def self.decode(input) def self.decode(input)
XML::Entities.decode(input, DECODE_MAPPING) XML::Entities.decode(input, DECODE_MAPPING)
end end

View File

@ -1,6 +1,5 @@
module Oga module Oga
module HTML module HTML
##
# Parser for processing HTML input. This parser is a small wrapper around # Parser for processing HTML input. This parser is a small wrapper around
# {Oga::XML::Parser} and takes care of setting the various options required # {Oga::XML::Parser} and takes care of setting the various options required
# for parsing HTML documents. # for parsing HTML documents.
@ -8,13 +7,10 @@ module Oga
# A basic example: # A basic example:
# #
# Oga::HTML::Parser.new('<meta charset="utf-8">').parse # Oga::HTML::Parser.new('<meta charset="utf-8">').parse
#
class Parser < XML::Parser class Parser < XML::Parser
##
# @param [String|IO] data # @param [String|IO] data
# @param [Hash] options # @param [Hash] options
# @see [Oga::XML::Parser#initialize] # @see [Oga::XML::Parser#initialize]
#
def initialize(data, options = {}) def initialize(data, options = {})
options = options.merge(:html => true) options = options.merge(:html => true)

View File

@ -1,13 +1,9 @@
module Oga module Oga
module HTML module HTML
##
# SAX parser for HTML documents. See the documentation of # SAX parser for HTML documents. See the documentation of
# {Oga::XML::SaxParser} for more information. # {Oga::XML::SaxParser} for more information.
#
class SaxParser < XML::SaxParser class SaxParser < XML::SaxParser
##
# @see [Oga::XML::SaxParser#initialize] # @see [Oga::XML::SaxParser#initialize]
#
def initialize(handler, data, options = {}) def initialize(handler, data, options = {})
options = options.merge(:html => true) options = options.merge(:html => true)

View File

@ -1,5 +1,4 @@
module Oga module Oga
##
# Thread-safe LRU cache using a Hash as the underlying storage engine. # Thread-safe LRU cache using a Hash as the underlying storage engine.
# Whenever the size of the cache exceeds the given limit the oldest keys are # Whenever the size of the cache exceeds the given limit the oldest keys are
# removed (base on insert order). # removed (base on insert order).
@ -22,11 +21,8 @@ module Oga
# cache.keys # => [:b, :c, :d] # cache.keys # => [:b, :c, :d]
# #
# @api private # @api private
#
class LRU class LRU
##
# @param [Fixnum] maximum # @param [Fixnum] maximum
#
def initialize(maximum = 1024) def initialize(maximum = 1024)
@maximum = maximum @maximum = maximum
@cache = {} @cache = {}
@ -35,9 +31,7 @@ module Oga
@owner = Thread.current @owner = Thread.current
end end
##
# @param [Fixnum] value # @param [Fixnum] value
#
def maximum=(value) def maximum=(value)
synchronize do synchronize do
@maximum = value @maximum = value
@ -46,30 +40,24 @@ module Oga
end end
end end
##
# @return [Fixnum] # @return [Fixnum]
#
def maximum def maximum
synchronize { @maximum } synchronize { @maximum }
end end
##
# Returns the value of the key. # Returns the value of the key.
# #
# @param [Mixed] key # @param [Mixed] key
# @return [Mixed] # @return [Mixed]
#
def [](key) def [](key)
synchronize { @cache[key] } synchronize { @cache[key] }
end end
##
# Sets the key and its value. Old keys are discarded if the LRU size exceeds # Sets the key and its value. Old keys are discarded if the LRU size exceeds
# the limit. # the limit.
# #
# @param [Mixed] key # @param [Mixed] key
# @param [Mixed] value # @param [Mixed] value
#
def []=(key, value) def []=(key, value)
synchronize do synchronize do
@cache[key] = value @cache[key] = value
@ -82,35 +70,27 @@ module Oga
end end
end end
##
# Returns a key if it exists, otherwise yields the supplied block and uses # Returns a key if it exists, otherwise yields the supplied block and uses
# its return value as the key value. # its return value as the key value.
# #
# @param [Mixed] key # @param [Mixed] key
# @return [Mixed] # @return [Mixed]
#
def get_or_set(key) def get_or_set(key)
synchronize { self[key] ||= yield } synchronize { self[key] ||= yield }
end end
##
# @return [Array] # @return [Array]
#
def keys def keys
synchronize { @keys } synchronize { @keys }
end end
##
# @param [Mixed] key # @param [Mixed] key
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def key?(key) def key?(key)
synchronize { @cache.key?(key) } synchronize { @cache.key?(key) }
end end
##
# Removes all keys from the cache. # Removes all keys from the cache.
#
def clear def clear
synchronize do synchronize do
@keys.clear @keys.clear
@ -118,9 +98,7 @@ module Oga
end end
end end
##
# @return [Fixnum] # @return [Fixnum]
#
def size def size
synchronize { @cache.size } synchronize { @cache.size }
end end
@ -129,10 +107,8 @@ module Oga
private private
##
# Yields the supplied block in a synchronized manner (if needed). This # Yields the supplied block in a synchronized manner (if needed). This
# method is heavily based on `MonitorMixin#mon_enter`. # method is heavily based on `MonitorMixin#mon_enter`.
#
def synchronize def synchronize
if @owner != Thread.current if @owner != Thread.current
@mutex.synchronize do @mutex.synchronize do
@ -147,10 +123,8 @@ module Oga
end end
end end
##
# Removes old keys until the size of the hash no longer exceeds the maximum # Removes old keys until the size of the hash no longer exceeds the maximum
# size. # size.
#
def resize def resize
return unless size > @maximum return unless size > @maximum

View File

@ -1,5 +1,4 @@
module Oga module Oga
##
# Parses the given XML document. # Parses the given XML document.
# #
# @example # @example
@ -8,12 +7,10 @@ module Oga
# @see [Oga::XML::Lexer#initialize] # @see [Oga::XML::Lexer#initialize]
# #
# @return [Oga::XML::Document] # @return [Oga::XML::Document]
#
def self.parse_xml(xml, options = {}) def self.parse_xml(xml, options = {})
XML::Parser.new(xml, options).parse XML::Parser.new(xml, options).parse
end end
##
# Parses the given HTML document. # Parses the given HTML document.
# #
# @example # @example
@ -22,12 +19,10 @@ module Oga
# @see [Oga::XML::Lexer#initialize] # @see [Oga::XML::Lexer#initialize]
# #
# @return [Oga::XML::Document] # @return [Oga::XML::Document]
#
def self.parse_html(html, options = {}) def self.parse_html(html, options = {})
HTML::Parser.new(html, options).parse HTML::Parser.new(html, options).parse
end end
##
# Parses the given XML document using the SAX parser. # Parses the given XML document using the SAX parser.
# #
# @example # @example
@ -36,12 +31,10 @@ module Oga
# Oga.sax_parse_html(handler, '<root>Hello</root>') # Oga.sax_parse_html(handler, '<root>Hello</root>')
# #
# @see [Oga::XML::SaxParser#initialize] # @see [Oga::XML::SaxParser#initialize]
#
def self.sax_parse_xml(handler, xml, options = {}) def self.sax_parse_xml(handler, xml, options = {})
XML::SaxParser.new(handler, xml, options).parse XML::SaxParser.new(handler, xml, options).parse
end end
##
# Parses the given HTML document using the SAX parser. # Parses the given HTML document using the SAX parser.
# #
# @example # @example
@ -50,7 +43,6 @@ module Oga
# Oga.sax_parse_html(handler, '<script>foo()</script>') # Oga.sax_parse_html(handler, '<script>foo()</script>')
# #
# @see [Oga::XML::SaxParser#initialize] # @see [Oga::XML::SaxParser#initialize]
#
def self.sax_parse_html(handler, html, options = {}) def self.sax_parse_html(handler, html, options = {})
HTML::SaxParser.new(handler, html, options).parse HTML::SaxParser.new(handler, html, options).parse
end end

View File

@ -1,17 +1,13 @@
module Oga module Oga
module Ruby module Ruby
##
# Class for converting a Ruby AST to a String. # Class for converting a Ruby AST to a String.
# #
# This class takes a {Oga::Ruby::Node} instance and converts it (and its # This class takes a {Oga::Ruby::Node} instance and converts it (and its
# child nodes) to a String that in turn can be passed to `eval` and the # child nodes) to a String that in turn can be passed to `eval` and the
# likes. # likes.
#
class Generator class Generator
##
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def process(ast) def process(ast)
send(:"on_#{ast.type}", ast) send(:"on_#{ast.type}", ast)
end end
@ -22,12 +18,10 @@ module Oga
ast.to_a.map { |child| process(child) }.join("\n\n") ast.to_a.map { |child| process(child) }.join("\n\n")
end end
##
# Processes an assignment node. # Processes an assignment node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_assign(ast) def on_assign(ast)
var, val = *ast var, val = *ast
@ -37,12 +31,10 @@ module Oga
"#{var_str} = #{val_str}" "#{var_str} = #{val_str}"
end end
##
# Processes a mass assignment node. # Processes a mass assignment node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_massign(ast) def on_massign(ast)
vars, val = *ast vars, val = *ast
@ -52,12 +44,10 @@ module Oga
"#{var_names.join(', ')} = #{val_str}" "#{var_names.join(', ')} = #{val_str}"
end end
##
# Processes a `begin` node. # Processes a `begin` node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_begin(ast) def on_begin(ast)
body = process(ast.to_a[0]) body = process(ast.to_a[0])
@ -68,12 +58,10 @@ end
EOF EOF
end end
##
# Processes an equality node. # Processes an equality node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_eq(ast) def on_eq(ast)
left, right = *ast left, right = *ast
@ -83,12 +71,10 @@ end
"#{left_str} == #{right_str}" "#{left_str} == #{right_str}"
end end
##
# Processes a boolean "and" node. # Processes a boolean "and" node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_and(ast) def on_and(ast)
left, right = *ast left, right = *ast
@ -98,12 +84,10 @@ end
"#{left_str} && #{right_str}" "#{left_str} && #{right_str}"
end end
##
# Processes a boolean "or" node. # Processes a boolean "or" node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_or(ast) def on_or(ast)
left, right = *ast left, right = *ast
@ -113,12 +97,10 @@ end
"(#{left_str} || #{right_str})" "(#{left_str} || #{right_str})"
end end
##
# Processes an if statement node. # Processes an if statement node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_if(ast) def on_if(ast)
cond, body, else_body = *ast cond, body, else_body = *ast
@ -144,12 +126,10 @@ end
end end
end end
##
# Processes a while statement node. # Processes a while statement node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_while(ast) def on_while(ast)
cond, body = *ast cond, body = *ast
@ -163,12 +143,10 @@ end
EOF EOF
end end
##
# Processes a method call node. # Processes a method call node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_send(ast) def on_send(ast)
receiver, name, *args = *ast receiver, name, *args = *ast
@ -188,12 +166,10 @@ end
call call
end end
##
# Processes a block node. # Processes a block node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_block(ast) def on_block(ast)
receiver, args, body = *ast receiver, args, body = *ast
@ -208,12 +184,10 @@ end
EOF EOF
end end
##
# Processes a Range node. # Processes a Range node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_range(ast) def on_range(ast)
start, stop = *ast start, stop = *ast
@ -223,32 +197,26 @@ end
"(#{start_str}..#{stop_str})" "(#{start_str}..#{stop_str})"
end end
##
# Processes a string node. # Processes a string node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_string(ast) def on_string(ast)
ast.to_a[0].inspect ast.to_a[0].inspect
end end
##
# Processes a Symbol node. # Processes a Symbol node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_symbol(ast) def on_symbol(ast)
ast.to_a[0].to_sym.inspect ast.to_a[0].to_sym.inspect
end end
##
# Processes a literal node. # Processes a literal node.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @return [String] # @return [String]
#
def on_lit(ast) def on_lit(ast)
ast.to_a[0] ast.to_a[0]
end end

View File

@ -1,6 +1,5 @@
module Oga module Oga
module Ruby module Ruby
##
# Class representing a single node in a Ruby AST. # Class representing a single node in a Ruby AST.
# #
# The setup of this class is roughly based on the "ast" Gem. The "ast" Gem # The setup of this class is roughly based on the "ast" Gem. The "ast" Gem
@ -25,7 +24,6 @@ module Oga
# end # end
# #
# @private # @private
#
class Node < BasicObject class Node < BasicObject
undef_method :!, :!= undef_method :!, :!=
@ -46,16 +44,13 @@ module Oga
alias_method :to_ary, :to_a alias_method :to_ary, :to_a
##
# Returns a "to_a" call node. # Returns a "to_a" call node.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def to_array def to_array
Node.new(:send, [self, :to_a]) Node.new(:send, [self, :to_a])
end end
##
# Returns an assignment node. # Returns an assignment node.
# #
# This method wraps assigned values in a begin/end block to ensure that # This method wraps assigned values in a begin/end block to ensure that
@ -63,7 +58,6 @@ module Oga
# #
# @param [Oga::Ruby::Node] other # @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def assign(other) def assign(other)
if other.type == :followed_by if other.type == :followed_by
other = other.wrap other = other.wrap
@ -72,133 +66,108 @@ module Oga
Node.new(:assign, [self, other]) Node.new(:assign, [self, other])
end end
##
# Returns an equality expression node. # Returns an equality expression node.
# #
# @param [Oga::Ruby::Node] other # @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def eq(other) def eq(other)
Node.new(:eq, [self, other]) Node.new(:eq, [self, other])
end end
##
# Returns a boolean "and" node. # Returns a boolean "and" node.
# #
# @param [Oga::Ruby::Node] other # @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def and(other) def and(other)
Node.new(:and, [self, other]) Node.new(:and, [self, other])
end end
##
# Returns a boolean "or" node. # Returns a boolean "or" node.
# #
# @param [Oga::Ruby::Node] other # @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def or(other) def or(other)
Node.new(:or, [self, other]) Node.new(:or, [self, other])
end end
##
# Returns a node that evaluates to its inverse. # Returns a node that evaluates to its inverse.
# #
# For example, a variable `foo` would be turned into `!foo`. # For example, a variable `foo` would be turned into `!foo`.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def not def not
!self !self
end end
##
# Returns a node for Ruby's "is_a?" method. # Returns a node for Ruby's "is_a?" method.
# #
# @param [Class] klass # @param [Class] klass
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def is_a?(klass) def is_a?(klass)
Node.new(:send, [self, 'is_a?', Node.new(:lit, [klass.to_s])]) Node.new(:send, [self, 'is_a?', Node.new(:lit, [klass.to_s])])
end end
##
# Wraps the current node in a block. # Wraps the current node in a block.
# #
# @param [Array] args Arguments (as Node instances) to pass to the block. # @param [Array] args Arguments (as Node instances) to pass to the block.
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def add_block(*args) def add_block(*args)
Node.new(:block, [self, args, yield]) Node.new(:block, [self, args, yield])
end end
##
# Wraps the current node in a `begin` node. # Wraps the current node in a `begin` node.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def wrap def wrap
Node.new(:begin, [self]) Node.new(:begin, [self])
end end
##
# Wraps the current node in an if statement node. # Wraps the current node in an if statement node.
# #
# The body of this statement is set to the return value of the supplied # The body of this statement is set to the return value of the supplied
# block. # block.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def if_true def if_true
Node.new(:if, [self, yield]) Node.new(:if, [self, yield])
end end
##
# Wraps the current node in an `if !...` statement. # Wraps the current node in an `if !...` statement.
# #
# @see [#if_true] # @see [#if_true]
#
def if_false def if_false
self.not.if_true { yield } self.not.if_true { yield }
end end
##
# Wraps the current node in a `while` statement. # Wraps the current node in a `while` statement.
# #
# The body of this statement is set to the return value of the supplied # The body of this statement is set to the return value of the supplied
# block. # block.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def while_true def while_true
Node.new(:while, [self, yield]) Node.new(:while, [self, yield])
end end
##
# Adds an "else" statement to the current node. # Adds an "else" statement to the current node.
# #
# This method assumes it's being called only on "if" nodes. # This method assumes it's being called only on "if" nodes.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def else def else
Node.new(:if, @children + [yield]) Node.new(:if, @children + [yield])
end end
##
# Chains two nodes together. # Chains two nodes together.
# #
# @param [Oga::Ruby::Node] other # @param [Oga::Ruby::Node] other
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def followed_by(other = nil) def followed_by(other = nil)
other = yield if ::Kernel.block_given? other = yield if ::Kernel.block_given?
Node.new(:followed_by, [self, other]) Node.new(:followed_by, [self, other])
end end
##
# Returns a node for a method call. # Returns a node for a method call.
# #
# @param [Symbol] name The name of the method to call. # @param [Symbol] name The name of the method to call.
@ -207,7 +176,6 @@ module Oga
# method. # method.
# #
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def method_missing(name, *args) def method_missing(name, *args)
Node.new(:send, [self, name.to_s, *args]) Node.new(:send, [self, name.to_s, *args])
end end

View File

@ -1,18 +1,12 @@
module Oga module Oga
##
# @api private # @api private
#
class Whitelist < Blacklist class Whitelist < Blacklist
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def allow?(name) def allow?(name)
names.include?(name) names.include?(name)
end end
##
# @return [Oga::Blacklist] # @return [Oga::Blacklist]
#
def to_blacklist def to_blacklist
Blacklist.new(names) Blacklist.new(names)
end end

View File

@ -1,8 +1,6 @@
module Oga module Oga
module XML module XML
##
# Class for storing information about a single XML attribute. # Class for storing information about a single XML attribute.
#
class Attribute class Attribute
include ExpandedName include ExpandedName
@ -19,25 +17,21 @@ module Oga
alias_method :parent, :element alias_method :parent, :element
##
# The default namespace available to all attributes. This namespace can # The default namespace available to all attributes. This namespace can
# not be modified. # not be modified.
# #
# @return [Oga::XML::Namespace] # @return [Oga::XML::Namespace]
#
DEFAULT_NAMESPACE = Namespace.new( DEFAULT_NAMESPACE = Namespace.new(
:name => 'xml', :name => 'xml',
:uri => XML::DEFAULT_NAMESPACE.uri :uri => XML::DEFAULT_NAMESPACE.uri
).freeze ).freeze
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :name # @option options [String] :name
# @option options [String] :namespace_name # @option options [String] :namespace_name
# @option options [String] :value # @option options [String] :value
# @option options [Oga::XML::Element] :element # @option options [Oga::XML::Element] :element
#
def initialize(options = {}) def initialize(options = {})
@name = options[:name] @name = options[:name]
@value = options[:value] @value = options[:value]
@ -46,12 +40,10 @@ module Oga
@namespace_name = options[:namespace_name] @namespace_name = options[:namespace_name]
end end
##
# Returns the {Oga::XML::Namespace} instance for the current namespace # Returns the {Oga::XML::Namespace} instance for the current namespace
# name. # name.
# #
# @return [Oga::XML::Namespace] # @return [Oga::XML::Namespace]
#
def namespace def namespace
unless @namespace unless @namespace
if namespace_name == DEFAULT_NAMESPACE.name if namespace_name == DEFAULT_NAMESPACE.name
@ -64,19 +56,15 @@ module Oga
@namespace @namespace
end end
##
# @param [String] value # @param [String] value
#
def value=(value) def value=(value)
@value = value @value = value
@decoded = false @decoded = false
end end
##
# Returns the value of the attribute or nil if no explicit value was set. # Returns the value of the attribute or nil if no explicit value was set.
# #
# @return [String|NilClass] # @return [String|NilClass]
#
def value def value
if !@decoded and @value if !@decoded and @value
@value = EntityDecoder.try_decode(@value, html?) @value = EntityDecoder.try_decode(@value, html?)
@ -86,18 +74,14 @@ module Oga
@value @value
end end
##
# @return [String] # @return [String]
#
def text def text
value.to_s value.to_s
end end
alias_method :to_s, :text alias_method :to_s, :text
##
# @return [String] # @return [String]
#
def to_xml def to_xml
if namespace_name if namespace_name
full_name = "#{namespace_name}:#{name}" full_name = "#{namespace_name}:#{name}"
@ -110,9 +94,7 @@ module Oga
%Q(#{full_name}="#{enc_value}") %Q(#{full_name}="#{enc_value}")
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
segments = [] segments = []
@ -138,9 +120,7 @@ module Oga
private private
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html? def html?
!!@element && @element.html? !!@element && @element.html?
end end

View File

@ -1,14 +1,10 @@
module Oga module Oga
module XML module XML
##
# Class used for storing information about CDATA tags. # Class used for storing information about CDATA tags.
#
class Cdata < CharacterNode class Cdata < CharacterNode
##
# Converts the node back to XML. # Converts the node back to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
"<![CDATA[#{text}]]>" "<![CDATA[#{text}]]>"
end end

View File

@ -1,34 +1,26 @@
module Oga module Oga
module XML module XML
##
# Base class for nodes that represent a text-like value such as Text and # Base class for nodes that represent a text-like value such as Text and
# Comment nodes. # Comment nodes.
#
class CharacterNode < Node class CharacterNode < Node
# @return [String] # @return [String]
attr_accessor :text attr_accessor :text
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :text The text of the node. # @option options [String] :text The text of the node.
#
def initialize(options = {}) def initialize(options = {})
super super
@text = options[:text] @text = options[:text]
end end
##
# @return [String] # @return [String]
#
def to_xml def to_xml
text.to_s text.to_s
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
"#{self.class.to_s.split('::').last}(#{text.inspect})" "#{self.class.to_s.split('::').last}(#{text.inspect})"
end end

View File

@ -1,14 +1,10 @@
module Oga module Oga
module XML module XML
##
# Class used for storing information about XML comments. # Class used for storing information about XML comments.
#
class Comment < CharacterNode class Comment < CharacterNode
##
# Converts the node back to XML. # Converts the node back to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
"<!--#{text}-->" "<!--#{text}-->"
end end

View File

@ -1,10 +1,8 @@
module Oga module Oga
module XML module XML
##
# The default XML namespace. # The default XML namespace.
# #
# @return [Oga::XML::Namespace] # @return [Oga::XML::Namespace]
#
DEFAULT_NAMESPACE = Namespace.new( DEFAULT_NAMESPACE = Namespace.new(
:name => 'xmlns', :name => 'xmlns',
:uri => 'http://www.w3.org/XML/1998/namespace' :uri => 'http://www.w3.org/XML/1998/namespace'

View File

@ -1,8 +1,6 @@
module Oga module Oga
module XML module XML
##
# Class used for storing information about Doctypes. # Class used for storing information about Doctypes.
#
class Doctype class Doctype
# The name of the doctype (e.g. "HTML"). # The name of the doctype (e.g. "HTML").
# @return [String] # @return [String]
@ -24,7 +22,6 @@ module Oga
# @return [String] # @return [String]
attr_accessor :inline_rules attr_accessor :inline_rules
##
# @example # @example
# dtd = Doctype.new(:name => 'html', :type => 'PUBLIC') # dtd = Doctype.new(:name => 'html', :type => 'PUBLIC')
# #
@ -34,7 +31,6 @@ module Oga
# @option options [String] :type # @option options [String] :type
# @option options [String] :public_id # @option options [String] :public_id
# @option options [String] :system_id # @option options [String] :system_id
#
def initialize(options = {}) def initialize(options = {})
@name = options[:name] @name = options[:name]
@type = options[:type] @type = options[:type]
@ -43,11 +39,9 @@ module Oga
@inline_rules = options[:inline_rules] @inline_rules = options[:inline_rules]
end end
##
# Converts the doctype back to XML. # Converts the doctype back to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
segments = "<!DOCTYPE #{name}" segments = "<!DOCTYPE #{name}"
@ -59,11 +53,9 @@ module Oga
segments + '>' segments + '>'
end end
##
# Inspects the doctype. # Inspects the doctype.
# #
# @return [String] # @return [String]
#
def inspect def inspect
segments = [] segments = []

View File

@ -1,9 +1,7 @@
module Oga module Oga
module XML module XML
##
# Class used for storing information about an entire XML document. This # Class used for storing information about an entire XML document. This
# includes the doctype, XML declaration, child nodes and more. # includes the doctype, XML declaration, child nodes and more.
#
class Document class Document
include Querying include Querying
include Traversal include Traversal
@ -18,14 +16,12 @@ module Oga
# @return [Symbol] # @return [Symbol]
attr_reader :type attr_reader :type
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [Oga::XML::NodeSet] :children # @option options [Oga::XML::NodeSet] :children
# @option options [Oga::XML::Doctype] :doctype # @option options [Oga::XML::Doctype] :doctype
# @option options [Oga::XML::XmlDeclaration] :xml_declaration # @option options [Oga::XML::XmlDeclaration] :xml_declaration
# @option options [Symbol] :type # @option options [Symbol] :type
#
def initialize(options = {}) def initialize(options = {})
@doctype = options[:doctype] @doctype = options[:doctype]
@xml_declaration = options[:xml_declaration] @xml_declaration = options[:xml_declaration]
@ -34,18 +30,14 @@ module Oga
self.children = options[:children] if options[:children] self.children = options[:children] if options[:children]
end end
##
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def children def children
@children ||= NodeSet.new([], self) @children ||= NodeSet.new([], self)
end end
##
# Sets the child nodes of the document. # Sets the child nodes of the document.
# #
# @param [Oga::XML::NodeSet|Array] nodes # @param [Oga::XML::NodeSet|Array] nodes
#
def children=(nodes) def children=(nodes)
if nodes.is_a?(NodeSet) if nodes.is_a?(NodeSet)
@children = nodes @children = nodes
@ -54,23 +46,19 @@ module Oga
end end
end end
##
# Returns self. # Returns self.
# #
# This method exists to make this class compatible with Element, which in # This method exists to make this class compatible with Element, which in
# turn makes it easier to use both in the XPath compiler. # turn makes it easier to use both in the XPath compiler.
# #
# @return [Oga::XML::Document] # @return [Oga::XML::Document]
#
def root_node def root_node
self self
end end
##
# Converts the document and its child nodes to XML. # Converts the document and its child nodes to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
xml = children.map(&:to_xml).join('') xml = children.map(&:to_xml).join('')
@ -85,19 +73,15 @@ module Oga
xml xml
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html? def html?
type.equal?(:html) type.equal?(:html)
end end
##
# Inspects the document and its child nodes. Child nodes are indented for # Inspects the document and its child nodes. Child nodes are indented for
# each nesting level. # each nesting level.
# #
# @return [String] # @return [String]
#
def inspect def inspect
segments = [] segments = []

View File

@ -1,9 +1,7 @@
module Oga module Oga
module XML module XML
##
# Class that contains information about an XML element such as the name, # Class that contains information about an XML element such as the name,
# attributes and child nodes. # attributes and child nodes.
#
class Element < Node class Element < Node
include Querying include Querying
include ExpandedName include ExpandedName
@ -20,14 +18,11 @@ module Oga
# @return [Hash] # @return [Hash]
attr_writer :namespaces attr_writer :namespaces
##
# The attribute prefix/namespace used for registering element namespaces. # The attribute prefix/namespace used for registering element namespaces.
# #
# @return [String] # @return [String]
#
XMLNS_PREFIX = 'xmlns'.freeze XMLNS_PREFIX = 'xmlns'.freeze
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :name The name of the element. # @option options [String] :name The name of the element.
@ -36,7 +31,6 @@ module Oga
# #
# @option options [Array<Oga::XML::Attribute>] :attributes The attributes # @option options [Array<Oga::XML::Attribute>] :attributes The attributes
# of the element as an Array. # of the element as an Array.
#
def initialize(options = {}) def initialize(options = {})
super super
@ -49,15 +43,12 @@ module Oga
register_namespaces_from_attributes register_namespaces_from_attributes
end end
##
# @param [String] name # @param [String] name
#
def namespace_name=(name) def namespace_name=(name)
@namespace_name = name @namespace_name = name
@namespace = nil @namespace = nil
end end
##
# Returns an attribute matching the given name (with or without the # Returns an attribute matching the given name (with or without the
# namespace). # namespace).
# #
@ -72,7 +63,6 @@ module Oga
# of the attribute. # of the attribute.
# #
# @return [Oga::XML::Attribute] # @return [Oga::XML::Attribute]
#
def attribute(name) def attribute(name)
name, ns = split_name(name) name, ns = split_name(name)
@ -85,32 +75,27 @@ module Oga
alias_method :attr, :attribute alias_method :attr, :attribute
##
# Returns the value of the given attribute. # Returns the value of the given attribute.
# #
# @example # @example
# element.get('class') # => "container" # element.get('class') # => "container"
# #
# @see [#attribute] # @see [#attribute]
#
def get(name) def get(name)
found = attribute(name) found = attribute(name)
found ? found.value : nil found ? found.value : nil
end end
##
# Adds a new attribute to the element. # Adds a new attribute to the element.
# #
# @param [Oga::XML::Attribute] attribute # @param [Oga::XML::Attribute] attribute
#
def add_attribute(attribute) def add_attribute(attribute)
attribute.element = self attribute.element = self
attributes << attribute attributes << attribute
end end
##
# Sets the value of an attribute to the given value. If the attribute does # Sets the value of an attribute to the given value. If the attribute does
# not exist it is created automatically. # not exist it is created automatically.
# #
@ -118,7 +103,6 @@ module Oga
# namespace. # namespace.
# #
# @param [String] value The new value of the attribute. # @param [String] value The new value of the attribute.
#
def set(name, value) def set(name, value)
found = attribute(name) found = attribute(name)
@ -141,25 +125,21 @@ module Oga
end end
end end
##
# Removes an attribute from the element. # Removes an attribute from the element.
# #
# @param [String] name The name (optionally including namespace prefix) # @param [String] name The name (optionally including namespace prefix)
# of the attribute to remove. # of the attribute to remove.
# #
# @return [Oga::XML::Attribute] # @return [Oga::XML::Attribute]
#
def unset(name) def unset(name)
found = attribute(name) found = attribute(name)
return attributes.delete(found) if found return attributes.delete(found) if found
end end
##
# Returns the namespace of the element. # Returns the namespace of the element.
# #
# @return [Oga::XML::Namespace] # @return [Oga::XML::Namespace]
#
def namespace def namespace
unless @namespace unless @namespace
available = available_namespaces available = available_namespaces
@ -169,40 +149,32 @@ module Oga
@namespace @namespace
end end
##
# Returns the namespaces registered on this element, or an empty Hash in # Returns the namespaces registered on this element, or an empty Hash in
# case of an HTML element. # case of an HTML element.
# #
# @return [Hash] # @return [Hash]
#
def namespaces def namespaces
html? ? {} : @namespaces html? ? {} : @namespaces
end end
##
# Returns true if the current element resides in the default XML # Returns true if the current element resides in the default XML
# namespace. # namespace.
# #
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def default_namespace? def default_namespace?
namespace == DEFAULT_NAMESPACE || namespace.nil? namespace == DEFAULT_NAMESPACE || namespace.nil?
end end
##
# Returns the text of all child nodes joined together. # Returns the text of all child nodes joined together.
# #
# @return [String] # @return [String]
#
def text def text
children.text children.text
end end
##
# Returns the text of the current element only. # Returns the text of the current element only.
# #
# @return [String] # @return [String]
#
def inner_text def inner_text
text = '' text = ''
@ -213,12 +185,10 @@ module Oga
text text
end end
##
# Returns any {Oga::XML::Text} nodes that are a direct child of this # Returns any {Oga::XML::Text} nodes that are a direct child of this
# element. # element.
# #
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def text_nodes def text_nodes
nodes = NodeSet.new nodes = NodeSet.new
@ -229,21 +199,17 @@ module Oga
nodes nodes
end end
##
# Sets the inner text of the current element to the given String. # Sets the inner text of the current element to the given String.
# #
# @param [String] text # @param [String] text
#
def inner_text=(text) def inner_text=(text)
text_node = XML::Text.new(:text => text) text_node = XML::Text.new(:text => text)
@children = NodeSet.new([text_node], self) @children = NodeSet.new([text_node], self)
end end
##
# Converts the element and its child elements to XML. # Converts the element and its child elements to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
if namespace_name if namespace_name
full_name = "#{namespace_name}:#{name}" full_name = "#{namespace_name}:#{name}"
@ -265,9 +231,7 @@ module Oga
end end
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
segments = [] segments = []
@ -284,7 +248,6 @@ module Oga
"Element(#{segments.join(' ')})" "Element(#{segments.join(' ')})"
end end
##
# Registers a new namespace for the current element and its child # Registers a new namespace for the current element and its child
# elements. # elements.
# #
@ -292,7 +255,6 @@ module Oga
# @param [String] uri # @param [String] uri
# @param [TrueClass|FalseClass] flush # @param [TrueClass|FalseClass] flush
# @see [Oga::XML::Namespace#initialize] # @see [Oga::XML::Namespace#initialize]
#
def register_namespace(name, uri, flush = true) def register_namespace(name, uri, flush = true)
if namespaces[name] if namespaces[name]
raise ArgumentError, "The namespace #{name.inspect} already exists" raise ArgumentError, "The namespace #{name.inspect} already exists"
@ -303,12 +265,10 @@ module Oga
flush_namespaces_cache if flush flush_namespaces_cache if flush
end end
##
# Returns a Hash containing all the namespaces available to the current # Returns a Hash containing all the namespaces available to the current
# element. # element.
# #
# @return [Hash] # @return [Hash]
#
def available_namespaces def available_namespaces
# HTML(5) completely ignores namespaces # HTML(5) completely ignores namespaces
unless @available_namespaces unless @available_namespaces
@ -333,11 +293,9 @@ module Oga
@available_namespaces @available_namespaces
end end
##
# Returns `true` if the element is a self-closing element. # Returns `true` if the element is a self-closing element.
# #
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def self_closing? def self_closing?
self_closing = children.empty? self_closing = children.empty?
root = root_node root = root_node
@ -350,10 +308,8 @@ module Oga
self_closing self_closing
end end
##
# Flushes the namespaces cache of the current element and all its child # Flushes the namespaces cache of the current element and all its child
# elements. # elements.
#
def flush_namespaces_cache def flush_namespaces_cache
@available_namespaces = nil @available_namespaces = nil
@namespace = nil @namespace = nil
@ -365,9 +321,7 @@ module Oga
private private
##
# Registers namespaces based on any "xmlns" attributes. # Registers namespaces based on any "xmlns" attributes.
#
def register_namespaces_from_attributes def register_namespaces_from_attributes
flush = false flush = false
@ -386,31 +340,25 @@ module Oga
flush_namespaces_cache if flush flush_namespaces_cache if flush
end end
##
# Links all attributes to the current element. # Links all attributes to the current element.
#
def link_attributes def link_attributes
attributes.each do |attr| attributes.each do |attr|
attr.element = self attr.element = self
end end
end end
##
# @param [String] name # @param [String] name
# @return [Array] # @return [Array]
#
def split_name(name) def split_name(name)
segments = name.to_s.split(':') segments = name.to_s.split(':')
[segments.pop, segments.pop] [segments.pop, segments.pop]
end end
##
# @param [Oga::XML::Attribute] attr # @param [Oga::XML::Attribute] attr
# @param [String] ns # @param [String] ns
# @param [String] name # @param [String] name
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def attribute_matches?(attr, ns, name) def attribute_matches?(attr, ns, name)
name_matches = attr.name == name name_matches = attr.name == name
ns_matches = false ns_matches = false

View File

@ -1,18 +1,14 @@
module Oga module Oga
module XML module XML
##
# Module for encoding/decoding XML and HTML entities. The mapping of HTML # Module for encoding/decoding XML and HTML entities. The mapping of HTML
# entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}. # entities can be found in {Oga::HTML::Entities::DECODE_MAPPING}.
#
module Entities module Entities
##
# Hash containing XML entities and the corresponding characters. # Hash containing XML entities and the corresponding characters.
# #
# The `&amp;` mapping must come last to ensure proper conversion of non # The `&amp;` mapping must come last to ensure proper conversion of non
# encoded to encoded forms (see {Oga::XML::Text#to_xml}). # encoded to encoded forms (see {Oga::XML::Text#to_xml}).
# #
# @return [Hash] # @return [Hash]
#
DECODE_MAPPING = { DECODE_MAPPING = {
'&lt;' => '<', '&lt;' => '<',
'&gt;' => '>', '&gt;' => '>',
@ -21,23 +17,19 @@ module Oga
'&amp;' => '&', '&amp;' => '&',
} }
##
# Hash containing characters and the corresponding XML entities. # Hash containing characters and the corresponding XML entities.
# #
# @return [Hash] # @return [Hash]
#
ENCODE_MAPPING = { ENCODE_MAPPING = {
'&' => '&amp;', '&' => '&amp;',
'>' => '&gt;', '>' => '&gt;',
'<' => '&lt;', '<' => '&lt;',
} }
##
# Hash containing characters and the corresponding XML entities to use # Hash containing characters and the corresponding XML entities to use
# when encoding XML/HTML attribute values. # when encoding XML/HTML attribute values.
# #
# @return [Hash] # @return [Hash]
#
ENCODE_ATTRIBUTE_MAPPING = { ENCODE_ATTRIBUTE_MAPPING = {
'&' => '&amp;', '&' => '&amp;',
'>' => '&gt;', '>' => '&gt;',
@ -46,50 +38,36 @@ module Oga
'"' => '&quot;' '"' => '&quot;'
} }
##
# @return [String] # @return [String]
#
AMPERSAND = '&'.freeze AMPERSAND = '&'.freeze
##
# Regexp for matching XML/HTML entities such as "&nbsp;". # Regexp for matching XML/HTML entities such as "&nbsp;".
# #
# @return [Regexp] # @return [Regexp]
#
REGULAR_ENTITY = /&[a-zA-Z0-9]+;/ REGULAR_ENTITY = /&[a-zA-Z0-9]+;/
##
# Regexp for matching XML/HTML numeric entities such as "&#38;". # Regexp for matching XML/HTML numeric entities such as "&#38;".
# #
# @return [Regexp] # @return [Regexp]
#
NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/ NUMERIC_CODE_POINT_ENTITY = /&#(\d+);/
##
# Regexp for matching XML/HTML hex entities such as "&#x3C;". # Regexp for matching XML/HTML hex entities such as "&#x3C;".
# #
# @return [Regexp] # @return [Regexp]
#
HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/ HEX_CODE_POINT_ENTITY = /&#x([a-fA-F0-9]+);/
##
# @return [Regexp] # @return [Regexp]
#
ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|')) ENCODE_REGEXP = Regexp.new(ENCODE_MAPPING.keys.join('|'))
##
# @return [Regexp] # @return [Regexp]
#
ENCODE_ATTRIBUTE_REGEXP = ENCODE_ATTRIBUTE_REGEXP =
Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|')) Regexp.new(ENCODE_ATTRIBUTE_MAPPING.keys.join('|'))
##
# Decodes XML entities. # Decodes XML entities.
# #
# @param [String] input # @param [String] input
# @param [Hash] mapping # @param [Hash] mapping
# @return [String] # @return [String]
#
def self.decode(input, mapping = DECODE_MAPPING) def self.decode(input, mapping = DECODE_MAPPING)
return input unless input.include?(AMPERSAND) return input unless input.include?(AMPERSAND)
@ -110,23 +88,19 @@ module Oga
input input
end end
##
# Encodes special characters as XML entities. # Encodes special characters as XML entities.
# #
# @param [String] input # @param [String] input
# @param [Hash] mapping # @param [Hash] mapping
# @return [String] # @return [String]
#
def self.encode(input, mapping = ENCODE_MAPPING) def self.encode(input, mapping = ENCODE_MAPPING)
input.gsub(ENCODE_REGEXP, mapping) input.gsub(ENCODE_REGEXP, mapping)
end end
##
# Encodes special characters in an XML attribute value. # Encodes special characters in an XML attribute value.
# #
# @param [String] input # @param [String] input
# @return [String] # @return [String]
#
def self.encode_attribute(input) def self.encode_attribute(input)
input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING) input.gsub(ENCODE_ATTRIBUTE_REGEXP, ENCODE_ATTRIBUTE_MAPPING)
end end

View File

@ -1,11 +1,9 @@
module Oga module Oga
module XML module XML
module ExpandedName module ExpandedName
##
# Returns the expanded name of the current Element or Attribute. # Returns the expanded name of the current Element or Attribute.
# #
# @return [String] # @return [String]
#
def expanded_name def expanded_name
namespace_name ? "#{namespace_name}:#{name}" : name namespace_name ? "#{namespace_name}:#{name}" : name
end end

View File

@ -1,12 +1,10 @@
module Oga module Oga
module XML module XML
##
# Names of the HTML void elements that should be handled when HTML lexing # Names of the HTML void elements that should be handled when HTML lexing
# is enabled. # is enabled.
# #
# @api private # @api private
# @return [Oga::Whitelist] # @return [Oga::Whitelist]
#
HTML_VOID_ELEMENTS = Whitelist.new(%w{ HTML_VOID_ELEMENTS = Whitelist.new(%w{
area base br col command embed hr img input keygen link meta param source area base br col command embed hr img input keygen link meta param source
track wbr track wbr

View File

@ -1,6 +1,5 @@
module Oga module Oga
module XML module XML
##
# Low level lexer that supports both XML and HTML (using an extra option). # Low level lexer that supports both XML and HTML (using an extra option).
# To lex HTML input set the `:html` option to `true` when creating an # To lex HTML input set the `:html` option to `true` when creating an
# instance of the lexer: # instance of the lexer:
@ -46,7 +45,6 @@ module Oga
# Strict mode only applies to XML documents. # Strict mode only applies to XML documents.
# #
# @private # @private
#
class Lexer class Lexer
# These are all constant/frozen to remove the need for String allocations # These are all constant/frozen to remove the need for String allocations
# every time they are referenced in the lexer. # every time they are referenced in the lexer.
@ -96,12 +94,9 @@ module Oga
HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key] HTML_CLOSE_SELF[key.upcase] = HTML_CLOSE_SELF[key]
end end
##
# Names of HTML tags of which the content should be lexed as-is. # Names of HTML tags of which the content should be lexed as-is.
#
LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE]) LITERAL_HTML_ELEMENTS = Whitelist.new([HTML_SCRIPT, HTML_STYLE])
##
# @param [String|IO] data The data to lex. This can either be a String or # @param [String|IO] data The data to lex. This can either be a String or
# an IO instance. # an IO instance.
# #
@ -113,7 +108,6 @@ module Oga
# #
# @option options [TrueClass|FalseClass] :strict Enables/disables strict # @option options [TrueClass|FalseClass] :strict Enables/disables strict
# parsing of XML documents, disabled by default. # parsing of XML documents, disabled by default.
#
def initialize(data, options = {}) def initialize(data, options = {})
@data = data @data = data
@html = options[:html] @html = options[:html]
@ -122,11 +116,9 @@ module Oga
reset reset
end end
##
# Resets the internal state of the lexer. Typically you don't need to # Resets the internal state of the lexer. Typically you don't need to
# call this method yourself as its called by #lex after lexing a given # call this method yourself as its called by #lex after lexing a given
# String. # String.
#
def reset def reset
@line = 1 @line = 1
@elements = [] @elements = []
@ -136,12 +128,10 @@ module Oga
reset_native reset_native
end end
##
# Yields the data to lex to the supplied block. # Yields the data to lex to the supplied block.
# #
# @return [String] # @return [String]
# @yieldparam [String] # @yieldparam [String]
#
def read_data def read_data
if @data.is_a?(String) if @data.is_a?(String)
yield @data yield @data
@ -157,7 +147,6 @@ module Oga
end end
end end
##
# Gathers all the tokens for the input and returns them as an Array. # Gathers all the tokens for the input and returns them as an Array.
# #
# This method resets the internal state of the lexer after consuming the # This method resets the internal state of the lexer after consuming the
@ -165,7 +154,6 @@ module Oga
# #
# @see #advance # @see #advance
# @return [Array] # @return [Array]
#
def lex def lex
tokens = [] tokens = []
@ -178,7 +166,6 @@ module Oga
tokens tokens
end end
##
# Advances through the input and generates the corresponding tokens. Each # Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block. # token is yielded to the supplied block.
# #
@ -196,7 +183,6 @@ module Oga
# @yieldparam [Symbol] type # @yieldparam [Symbol] type
# @yieldparam [String] value # @yieldparam [String] value
# @yieldparam [Fixnum] line # @yieldparam [Fixnum] line
#
def advance(&block) def advance(&block)
@block = block @block = block
@ -212,44 +198,33 @@ module Oga
@block = nil @block = nil
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html? def html?
@html == true @html == true
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def strict? def strict?
@strict @strict
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html_script? def html_script?
html? && current_element == HTML_SCRIPT html? && current_element == HTML_SCRIPT
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html_style? def html_style?
html? && current_element == HTML_STYLE html? && current_element == HTML_STYLE
end end
private private
##
# @param [Fixnum] amount The amount of lines to advance. # @param [Fixnum] amount The amount of lines to advance.
#
def advance_line(amount = 1) def advance_line(amount = 1)
@line += amount @line += amount
end end
##
# Calls the supplied block with the information of the current token. # Calls the supplied block with the information of the current token.
# #
# @param [Symbol] type The token type. # @param [Symbol] type The token type.
@ -258,192 +233,145 @@ module Oga
# @yieldparam [String] type # @yieldparam [String] type
# @yieldparam [String] value # @yieldparam [String] value
# @yieldparam [Fixnum] line # @yieldparam [Fixnum] line
#
def add_token(type, value = nil) def add_token(type, value = nil)
@block.call(type, value, @line) @block.call(type, value, @line)
end end
##
# Returns the name of the element we're currently in. # Returns the name of the element we're currently in.
# #
# @return [String] # @return [String]
#
def current_element def current_element
@elements.last @elements.last
end end
##
# Called when processing a single quote. # Called when processing a single quote.
#
def on_string_squote def on_string_squote
add_token(:T_STRING_SQUOTE) add_token(:T_STRING_SQUOTE)
end end
##
# Called when processing a double quote. # Called when processing a double quote.
#
def on_string_dquote def on_string_dquote
add_token(:T_STRING_DQUOTE) add_token(:T_STRING_DQUOTE)
end end
##
# Called when processing the body of a string. # Called when processing the body of a string.
# #
# @param [String] value The data between the quotes. # @param [String] value The data between the quotes.
#
def on_string_body(value) def on_string_body(value)
add_token(:T_STRING_BODY, value) add_token(:T_STRING_BODY, value)
end end
##
# Called when a doctype starts. # Called when a doctype starts.
#
def on_doctype_start def on_doctype_start
add_token(:T_DOCTYPE_START) add_token(:T_DOCTYPE_START)
end end
##
# Called on the identifier specifying the type of the doctype. # Called on the identifier specifying the type of the doctype.
# #
# @param [String] value # @param [String] value
#
def on_doctype_type(value) def on_doctype_type(value)
add_token(:T_DOCTYPE_TYPE, value) add_token(:T_DOCTYPE_TYPE, value)
end end
##
# Called on the identifier specifying the name of the doctype. # Called on the identifier specifying the name of the doctype.
# #
# @param [String] value # @param [String] value
#
def on_doctype_name(value) def on_doctype_name(value)
add_token(:T_DOCTYPE_NAME, value) add_token(:T_DOCTYPE_NAME, value)
end end
##
# Called on the end of a doctype. # Called on the end of a doctype.
#
def on_doctype_end def on_doctype_end
add_token(:T_DOCTYPE_END) add_token(:T_DOCTYPE_END)
end end
##
# Called on an inline doctype block. # Called on an inline doctype block.
# #
# @param [String] value # @param [String] value
#
def on_doctype_inline(value) def on_doctype_inline(value)
add_token(:T_DOCTYPE_INLINE, value) add_token(:T_DOCTYPE_INLINE, value)
end end
##
# Called on the open CDATA tag. # Called on the open CDATA tag.
#
def on_cdata_start def on_cdata_start
add_token(:T_CDATA_START) add_token(:T_CDATA_START)
end end
##
# Called on the closing CDATA tag. # Called on the closing CDATA tag.
#
def on_cdata_end def on_cdata_end
add_token(:T_CDATA_END) add_token(:T_CDATA_END)
end end
##
# Called for the body of a CDATA tag. # Called for the body of a CDATA tag.
# #
# @param [String] value # @param [String] value
#
def on_cdata_body(value) def on_cdata_body(value)
add_token(:T_CDATA_BODY, value) add_token(:T_CDATA_BODY, value)
end end
##
# Called on the open comment tag. # Called on the open comment tag.
#
def on_comment_start def on_comment_start
add_token(:T_COMMENT_START) add_token(:T_COMMENT_START)
end end
##
# Called on the closing comment tag. # Called on the closing comment tag.
#
def on_comment_end def on_comment_end
add_token(:T_COMMENT_END) add_token(:T_COMMENT_END)
end end
##
# Called on a comment. # Called on a comment.
# #
# @param [String] value # @param [String] value
#
def on_comment_body(value) def on_comment_body(value)
add_token(:T_COMMENT_BODY, value) add_token(:T_COMMENT_BODY, value)
end end
##
# Called on the start of an XML declaration tag. # Called on the start of an XML declaration tag.
#
def on_xml_decl_start def on_xml_decl_start
add_token(:T_XML_DECL_START) add_token(:T_XML_DECL_START)
end end
##
# Called on the end of an XML declaration tag. # Called on the end of an XML declaration tag.
#
def on_xml_decl_end def on_xml_decl_end
add_token(:T_XML_DECL_END) add_token(:T_XML_DECL_END)
end end
##
# Called on the start of a processing instruction. # Called on the start of a processing instruction.
#
def on_proc_ins_start def on_proc_ins_start
add_token(:T_PROC_INS_START) add_token(:T_PROC_INS_START)
end end
##
# Called on a processing instruction name. # Called on a processing instruction name.
# #
# @param [String] value # @param [String] value
#
def on_proc_ins_name(value) def on_proc_ins_name(value)
add_token(:T_PROC_INS_NAME, value) add_token(:T_PROC_INS_NAME, value)
end end
##
# Called on the body of a processing instruction. # Called on the body of a processing instruction.
# #
# @param [String] value # @param [String] value
#
def on_proc_ins_body(value) def on_proc_ins_body(value)
add_token(:T_PROC_INS_BODY, value) add_token(:T_PROC_INS_BODY, value)
end end
##
# Called on the end of a processing instruction. # Called on the end of a processing instruction.
#
def on_proc_ins_end def on_proc_ins_end
add_token(:T_PROC_INS_END) add_token(:T_PROC_INS_END)
end end
##
# Called on the name of an element. # Called on the name of an element.
# #
# @param [String] name The name of the element, including namespace. # @param [String] name The name of the element, including namespace.
#
def on_element_name(name) def on_element_name(name)
before_html_element_name(name) if html? before_html_element_name(name) if html?
add_element(name) add_element(name)
end end
##
# Handles inserting of any missing tags whenever a new HTML tag is opened. # Handles inserting of any missing tags whenever a new HTML tag is opened.
# #
# @param [String] name # @param [String] name
#
def before_html_element_name(name) def before_html_element_name(name)
close_current = HTML_CLOSE_SELF[current_element] close_current = HTML_CLOSE_SELF[current_element]
@ -463,27 +391,21 @@ module Oga
end end
end end
##
# @param [String] name # @param [String] name
#
def add_element(name) def add_element(name)
@elements << name @elements << name
add_token(:T_ELEM_NAME, name) add_token(:T_ELEM_NAME, name)
end end
##
# Called on the element namespace. # Called on the element namespace.
# #
# @param [String] namespace # @param [String] namespace
#
def on_element_ns(namespace) def on_element_ns(namespace)
add_token(:T_ELEM_NS, namespace) add_token(:T_ELEM_NS, namespace)
end end
##
# Called on the closing `>` of the open tag of an element. # Called on the closing `>` of the open tag of an element.
#
def on_element_open_end def on_element_open_end
return unless html? return unless html?
@ -496,12 +418,10 @@ module Oga
end end
end end
##
# Called on the closing tag of an element. # Called on the closing tag of an element.
# #
# @param [String] name The name of the element (minus namespace # @param [String] name The name of the element (minus namespace
# prefix). This is not set for self closing tags. # prefix). This is not set for self closing tags.
#
def on_element_end(name = nil) def on_element_end(name = nil)
return if @elements.empty? return if @elements.empty?
@ -520,31 +440,25 @@ module Oga
@elements.pop @elements.pop
end end
##
# Called on regular text values. # Called on regular text values.
# #
# @param [String] value # @param [String] value
#
def on_text(value) def on_text(value)
return if value.empty? return if value.empty?
add_token(:T_TEXT, value) add_token(:T_TEXT, value)
end end
##
# Called on attribute namespaces. # Called on attribute namespaces.
# #
# @param [String] value # @param [String] value
#
def on_attribute_ns(value) def on_attribute_ns(value)
add_token(:T_ATTR_NS, value) add_token(:T_ATTR_NS, value)
end end
##
# Called on tag attributes. # Called on tag attributes.
# #
# @param [String] value # @param [String] value
#
def on_attribute(value) def on_attribute(value)
add_token(:T_ATTR, value) add_token(:T_ATTR, value)
end end

View File

@ -1,9 +1,7 @@
module Oga module Oga
module XML module XML
##
# The Namespace class contains information about XML namespaces such as the # The Namespace class contains information about XML namespaces such as the
# name and URI. # name and URI.
#
class Namespace class Namespace
# @return [String] # @return [String]
attr_accessor :name attr_accessor :name
@ -11,35 +9,27 @@ module Oga
# @return [String] # @return [String]
attr_accessor :uri attr_accessor :uri
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :name # @option options [String] :name
# @option options [String] :uri # @option options [String] :uri
#
def initialize(options = {}) def initialize(options = {})
@name = options[:name] @name = options[:name]
@uri = options[:uri] @uri = options[:uri]
end end
##
# @return [String] # @return [String]
#
def to_s def to_s
name.to_s name.to_s
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
"Namespace(name: #{name.inspect} uri: #{uri.inspect})" "Namespace(name: #{name.inspect} uri: #{uri.inspect})"
end end
##
# @param [Oga::XML::Namespace] other # @param [Oga::XML::Namespace] other
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def ==(other) def ==(other)
other.is_a?(self.class) && name == other.name && uri == other.uri other.is_a?(self.class) && name == other.name && uri == other.uri
end end

View File

@ -1,17 +1,14 @@
module Oga module Oga
module XML module XML
##
# A generic XML node. Instances of this class can belong to a # A generic XML node. Instances of this class can belong to a
# {Oga::XML::NodeSet} and can be used to query surrounding and parent # {Oga::XML::NodeSet} and can be used to query surrounding and parent
# nodes. # nodes.
#
class Node class Node
include Traversal include Traversal
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
attr_reader :node_set attr_reader :node_set
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [Oga::XML::NodeSet] :node_set The node set that this # @option options [Oga::XML::NodeSet] :node_set The node set that this
@ -19,35 +16,28 @@ module Oga
# #
# @option options [Oga::XML::NodeSet|Array] :children The child nodes of # @option options [Oga::XML::NodeSet|Array] :children The child nodes of
# the current node. # the current node.
#
def initialize(options = {}) def initialize(options = {})
self.node_set = options[:node_set] self.node_set = options[:node_set]
self.children = options[:children] if options[:children] self.children = options[:children] if options[:children]
end end
##
# @param [Oga::XML::NodeSet] set # @param [Oga::XML::NodeSet] set
#
def node_set=(set) def node_set=(set)
@node_set = set @node_set = set
@root_node = nil @root_node = nil
@html_p = nil @html_p = nil
end end
##
# Returns the child nodes of the current node. # Returns the child nodes of the current node.
# #
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def children def children
@children ||= NodeSet.new([], self) @children ||= NodeSet.new([], self)
end end
##
# Sets the child nodes of the element. # Sets the child nodes of the element.
# #
# @param [Oga::XML::NodeSet|Array] nodes # @param [Oga::XML::NodeSet|Array] nodes
#
def children=(nodes) def children=(nodes)
if nodes.is_a?(NodeSet) if nodes.is_a?(NodeSet)
@children = nodes @children = nodes
@ -56,32 +46,26 @@ module Oga
end end
end end
##
# Returns the parent node of the current node. If there is no parent node # Returns the parent node of the current node. If there is no parent node
# `nil` is returned instead. # `nil` is returned instead.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def parent def parent
node_set ? node_set.owner : nil node_set ? node_set.owner : nil
end end
##
# Returns the preceding node, or nil if there is none. # Returns the preceding node, or nil if there is none.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def previous def previous
index = node_set.index(self) - 1 index = node_set.index(self) - 1
index >= 0 ? node_set[index] : nil index >= 0 ? node_set[index] : nil
end end
##
# Returns the following node, or nil if there is none. # Returns the following node, or nil if there is none.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def next def next
index = node_set.index(self) + 1 index = node_set.index(self) + 1
length = node_set.length length = node_set.length
@ -89,11 +73,9 @@ module Oga
index <= length ? node_set[index] : nil index <= length ? node_set[index] : nil
end end
##
# Returns the previous element node or nil if there is none. # Returns the previous element node or nil if there is none.
# #
# @return [Oga::XML::Element] # @return [Oga::XML::Element]
#
def previous_element def previous_element
node = self node = self
@ -104,11 +86,9 @@ module Oga
return return
end end
##
# Returns the next element node or nil if there is none. # Returns the next element node or nil if there is none.
# #
# @return [Oga::XML::Element] # @return [Oga::XML::Element]
#
def next_element def next_element
node = self node = self
@ -119,12 +99,10 @@ module Oga
return return
end end
##
# Returns the root document/node of the current node. The node is # Returns the root document/node of the current node. The node is
# retrieved by traversing upwards in the DOM tree from the current node. # retrieved by traversing upwards in the DOM tree from the current node.
# #
# @return [Oga::XML::Document|Oga::XML::Node] # @return [Oga::XML::Document|Oga::XML::Node]
#
def root_node def root_node
unless @root_node unless @root_node
node = self node = self
@ -143,16 +121,13 @@ module Oga
@root_node @root_node
end end
##
# Removes the current node from the owning node set. # Removes the current node from the owning node set.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def remove def remove
return node_set.delete(self) if node_set return node_set.delete(self) if node_set
end end
##
# Replaces the current node with another. # Replaces the current node with another.
# #
# @example Replacing with an element # @example Replacing with an element
@ -163,7 +138,6 @@ module Oga
# some_node.replace('this will replace the current node with a text node') # some_node.replace('this will replace the current node with a text node')
# #
# @param [String|Oga::XML::Node] other # @param [String|Oga::XML::Node] other
#
def replace(other) def replace(other)
if other.is_a?(String) if other.is_a?(String)
other = Text.new(:text => other) other = Text.new(:text => other)
@ -173,31 +147,25 @@ module Oga
remove remove
end end
##
# Inserts the given node before the current node. # Inserts the given node before the current node.
# #
# @param [Oga::XML::Node] other # @param [Oga::XML::Node] other
#
def before(other) def before(other)
index = node_set.index(self) index = node_set.index(self)
node_set.insert(index, other) node_set.insert(index, other)
end end
##
# Inserts the given node after the current node. # Inserts the given node after the current node.
# #
# @param [Oga::XML::Node] other # @param [Oga::XML::Node] other
#
def after(other) def after(other)
index = node_set.index(self) + 1 index = node_set.index(self) + 1
node_set.insert(index, other) node_set.insert(index, other)
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def html? def html?
if @html_p.nil? if @html_p.nil?
root = root_node root = root_node
@ -208,14 +176,11 @@ module Oga
@html_p @html_p
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def xml? def xml?
!html? !html?
end end
##
# Yields all ancestor elements of the current node. # Yields all ancestor elements of the current node.
# #
# @example # @example
@ -224,7 +189,6 @@ module Oga
# end # end
# #
# @yieldparam [Oga::XML::Node] # @yieldparam [Oga::XML::Node]
#
def each_ancestor def each_ancestor
node = parent node = parent

View File

@ -1,6 +1,5 @@
module Oga module Oga
module XML module XML
##
# The NodeSet class contains a set of unique {Oga::XML::Node} instances that # The NodeSet class contains a set of unique {Oga::XML::Node} instances that
# can be queried and modified. Optionally NodeSet instances can take # can be queried and modified. Optionally NodeSet instances can take
# ownership of a node (besides just containing it). This allows the nodes to # ownership of a node (besides just containing it). This allows the nodes to
@ -30,17 +29,14 @@ module Oga
# #
# If ownership was not handled then you'd have to manually set the # If ownership was not handled then you'd have to manually set the
# `element` variable's `node_set` attribute after pushing it into a set. # `element` variable's `node_set` attribute after pushing it into a set.
#
class NodeSet class NodeSet
include Enumerable include Enumerable
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
attr_accessor :owner attr_accessor :owner
##
# @param [Array] nodes The nodes to add to the set. # @param [Array] nodes The nodes to add to the set.
# @param [Oga::XML::NodeSet] owner The owner of the set. # @param [Oga::XML::NodeSet] owner The owner of the set.
#
def initialize(nodes = [], owner = nil) def initialize(nodes = [], owner = nil)
@nodes = nodes @nodes = nodes
@owner = owner @owner = owner
@ -53,38 +49,30 @@ module Oga
end end
end end
##
# Yields the supplied block for every node. # Yields the supplied block for every node.
# #
# @yieldparam [Oga::XML::Node] # @yieldparam [Oga::XML::Node]
#
def each def each
@nodes.each { |node| yield node } @nodes.each { |node| yield node }
end end
##
# Returns the last node in the set. # Returns the last node in the set.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def last def last
@nodes[-1] @nodes[-1]
end end
##
# Returns `true` if the set is empty. # Returns `true` if the set is empty.
# #
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def empty? def empty?
@nodes.empty? @nodes.empty?
end end
##
# Returns the amount of nodes in the set. # Returns the amount of nodes in the set.
# #
# @return [Fixnum] # @return [Fixnum]
#
def length def length
@nodes.length @nodes.length
end end
@ -92,21 +80,17 @@ module Oga
alias_method :count, :length alias_method :count, :length
alias_method :size, :length alias_method :size, :length
##
# Returns the index of the given node. # Returns the index of the given node.
# #
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
# @return [Fixnum] # @return [Fixnum]
#
def index(node) def index(node)
@nodes.index(node) @nodes.index(node)
end end
##
# Pushes the node at the end of the set. # Pushes the node at the end of the set.
# #
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
#
def push(node) def push(node)
return if exists?(node) return if exists?(node)
@ -119,11 +103,9 @@ module Oga
alias_method :<<, :push alias_method :<<, :push
##
# Pushes the node at the start of the set. # Pushes the node at the start of the set.
# #
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
#
def unshift(node) def unshift(node)
return if exists?(node) return if exists?(node)
@ -134,11 +116,9 @@ module Oga
take_ownership(node) if @owner take_ownership(node) if @owner
end end
##
# Shifts a node from the start of the set. # Shifts a node from the start of the set.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def shift def shift
node = @nodes.shift node = @nodes.shift
@ -151,11 +131,9 @@ module Oga
node node
end end
##
# Pops a node from the end of the set. # Pops a node from the end of the set.
# #
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def pop def pop
node = @nodes.pop node = @nodes.pop
@ -168,12 +146,10 @@ module Oga
node node
end end
##
# Inserts a node into the set at the given index. # Inserts a node into the set at the given index.
# #
# @param [Fixnum] index The index to insert the node at. # @param [Fixnum] index The index to insert the node at.
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
#
def insert(index, node) def insert(index, node)
return if exists?(node) return if exists?(node)
@ -184,73 +160,59 @@ module Oga
take_ownership(node) if @owner take_ownership(node) if @owner
end end
##
# Returns the node for the given index. # Returns the node for the given index.
# #
# @param [Fixnum] index # @param [Fixnum] index
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
#
def [](index) def [](index)
@nodes[index] @nodes[index]
end end
##
# Converts the current set to an Array. # Converts the current set to an Array.
# #
# @return [Array] # @return [Array]
#
def to_a def to_a
@nodes @nodes
end end
##
# Creates a new set based on the current and the specified set. The newly # Creates a new set based on the current and the specified set. The newly
# created set does not inherit ownership rules of the current set. # created set does not inherit ownership rules of the current set.
# #
# @param [Oga::XML::NodeSet] other # @param [Oga::XML::NodeSet] other
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def +(other) def +(other)
self.class.new(to_a | other.to_a) self.class.new(to_a | other.to_a)
end end
##
# Returns `true` if the current node set and the one given in `other` are # Returns `true` if the current node set and the one given in `other` are
# equal to each other. # equal to each other.
# #
# @param [Oga::XML::NodeSet] other # @param [Oga::XML::NodeSet] other
#
def ==(other) def ==(other)
other.is_a?(NodeSet) && other.equal_nodes?(@nodes) other.is_a?(NodeSet) && other.equal_nodes?(@nodes)
end end
##
# Returns `true` if the nodes given in `nodes` are equal to those # Returns `true` if the nodes given in `nodes` are equal to those
# specified in the current `@nodes` variable. This method allows two # specified in the current `@nodes` variable. This method allows two
# NodeSet instances to compare each other without the need of exposing # NodeSet instances to compare each other without the need of exposing
# `@nodes` to the public. # `@nodes` to the public.
# #
# @param [Array<Oga::XML::Node>] nodes # @param [Array<Oga::XML::Node>] nodes
#
def equal_nodes?(nodes) def equal_nodes?(nodes)
@nodes == nodes @nodes == nodes
end end
##
# Adds the nodes of the given node set to the current node set. # Adds the nodes of the given node set to the current node set.
# #
# @param [Oga::XML::NodeSet] other # @param [Oga::XML::NodeSet] other
#
def concat(other) def concat(other)
other.each { |node| push(node) } other.each { |node| push(node) }
end end
##
# Removes the current nodes from their owning set. The nodes are *not* # Removes the current nodes from their owning set. The nodes are *not*
# removed from the current set. # removed from the current set.
# #
# This method is intended to remove nodes from an XML document/node. # This method is intended to remove nodes from an XML document/node.
#
def remove def remove
sets = [] sets = []
@ -270,9 +232,7 @@ module Oga
end end
end end
##
# Removes a node from the current set only. # Removes a node from the current set only.
#
def delete(node) def delete(node)
removed = @nodes.delete(node) removed = @nodes.delete(node)
@ -285,12 +245,10 @@ module Oga
removed removed
end end
##
# Returns the values of the given attribute. # Returns the values of the given attribute.
# #
# @param [String|Symbol] name The name of the attribute. # @param [String|Symbol] name The name of the attribute.
# @return [Array] # @return [Array]
#
def attribute(name) def attribute(name)
values = [] values = []
@ -305,11 +263,9 @@ module Oga
alias_method :attr, :attribute alias_method :attr, :attribute
##
# Returns the text of all nodes in the set, ignoring comment nodes. # Returns the text of all nodes in the set, ignoring comment nodes.
# #
# @return [String] # @return [String]
#
def text def text
text = '' text = ''
@ -322,9 +278,7 @@ module Oga
text text
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
values = @nodes.map(&:inspect).join(', ') values = @nodes.map(&:inspect).join(', ')
@ -333,21 +287,17 @@ module Oga
private private
##
# Takes ownership of the given node. This only occurs when the current # Takes ownership of the given node. This only occurs when the current
# set has an owner. # set has an owner.
# #
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
#
def take_ownership(node) def take_ownership(node)
node.node_set = self node.node_set = self
end end
##
# Removes ownership of the node if it belongs to the current set. # Removes ownership of the node if it belongs to the current set.
# #
# @param [Oga::XML::Node] node # @param [Oga::XML::Node] node
#
def remove_ownership(node) def remove_ownership(node)
node.node_set = nil if node.node_set == self node.node_set = nil if node.node_set == self
end end

View File

@ -1,6 +1,5 @@
%header %header
{ {
##
# DOM parser for both XML and HTML. # DOM parser for both XML and HTML.
# #
# This parser does not produce a dedicated AST, instead it emits XML nodes # This parser does not produce a dedicated AST, instead it emits XML nodes
@ -205,11 +204,9 @@ string_body
%inner %inner
{ {
##
# Hash mapping token types and dedicated error labels. # Hash mapping token types and dedicated error labels.
# #
# @return [Hash] # @return [Hash]
#
TOKEN_ERROR_MAPPING = { TOKEN_ERROR_MAPPING = {
:T_STRING => 'string', :T_STRING => 'string',
:T_TEXT => 'text', :T_TEXT => 'text',
@ -234,11 +231,9 @@ string_body
-1 => 'end of input' -1 => 'end of input'
} }
##
# @param [String|IO] data The input to parse. # @param [String|IO] data The input to parse.
# @param [Hash] options # @param [Hash] options
# @see [Oga::XML::Lexer#initialize] # @see [Oga::XML::Lexer#initialize]
#
def initialize(data, options = {}) def initialize(data, options = {})
@data = data @data = data
@lexer = Lexer.new(data, options) @lexer = Lexer.new(data, options)
@ -246,20 +241,16 @@ string_body
reset reset
end end
##
# Resets the internal state of the parser. # Resets the internal state of the parser.
#
def reset def reset
@line = 1 @line = 1
@lexer.reset @lexer.reset
end end
##
# Yields the next token from the lexer. # Yields the next token from the lexer.
# #
# @yieldparam [Array] # @yieldparam [Array]
#
def each_token def each_token
@lexer.advance do |type, value, line| @lexer.advance do |type, value, line|
@line = line if line @line = line if line
@ -270,12 +261,10 @@ string_body
yield [-1, -1] yield [-1, -1]
end end
##
# @param [Fixnum] stack_type # @param [Fixnum] stack_type
# @param [Fixnum] stack_value # @param [Fixnum] stack_value
# @param [Symbol] token_type # @param [Symbol] token_type
# @param [String] token_value # @param [String] token_value
#
def parser_error(stack_type, stack_value, token_type, token_value) def parser_error(stack_type, stack_value, token_type, token_value)
case id_to_type(stack_type) case id_to_type(stack_type)
when :rule when :rule
@ -294,9 +283,7 @@ string_body
raise LL::ParserError, message raise LL::ParserError, message
end end
##
# @see [LL::Driver#parse] # @see [LL::Driver#parse]
#
def parse def parse
retval = super retval = super
@ -305,10 +292,8 @@ string_body
retval retval
end end
##
# @param [Array] children # @param [Array] children
# @return [Oga::XML::Document] # @return [Oga::XML::Document]
#
def on_document(children = []) def on_document(children = [])
document = Document.new(:type => @lexer.html? ? :html : :xml) document = Document.new(:type => @lexer.html? ? :html : :xml)
@ -327,42 +312,32 @@ string_body
document document
end end
##
# @param [Hash] options # @param [Hash] options
#
def on_doctype(options = {}) def on_doctype(options = {})
Doctype.new(options) Doctype.new(options)
end end
##
# @param [String] text # @param [String] text
# @return [Oga::XML::Cdata] # @return [Oga::XML::Cdata]
#
def on_cdata(text = nil) def on_cdata(text = nil)
Cdata.new(:text => text) Cdata.new(:text => text)
end end
##
# @param [String] text # @param [String] text
# @return [Oga::XML::Comment] # @return [Oga::XML::Comment]
#
def on_comment(text = nil) def on_comment(text = nil)
Comment.new(:text => text) Comment.new(:text => text)
end end
##
# @param [String] name # @param [String] name
# @param [String] text # @param [String] text
# @return [Oga::XML::ProcessingInstruction] # @return [Oga::XML::ProcessingInstruction]
#
def on_proc_ins(name, text = nil) def on_proc_ins(name, text = nil)
ProcessingInstruction.new(:name => name, :text => text) ProcessingInstruction.new(:name => name, :text => text)
end end
##
# @param [Array] attributes # @param [Array] attributes
# @return [Oga::XML::XmlDeclaration] # @return [Oga::XML::XmlDeclaration]
#
def on_xml_decl(attributes = []) def on_xml_decl(attributes = [])
options = {} options = {}
@ -373,20 +348,16 @@ string_body
XmlDeclaration.new(options) XmlDeclaration.new(options)
end end
##
# @param [String] text # @param [String] text
# @return [Oga::XML::Text] # @return [Oga::XML::Text]
#
def on_text(text) def on_text(text)
Text.new(:text => text) Text.new(:text => text)
end end
##
# @param [String] namespace # @param [String] namespace
# @param [String] name # @param [String] name
# @param [Hash] attributes # @param [Hash] attributes
# @return [Oga::XML::Element] # @return [Oga::XML::Element]
#
def on_element(namespace, name, attributes = {}) def on_element(namespace, name, attributes = {})
element = Element.new( element = Element.new(
:namespace_name => namespace, :namespace_name => namespace,
@ -397,31 +368,25 @@ string_body
element element
end end
##
# @param [Oga::XML::Element] element # @param [Oga::XML::Element] element
# @param [Array] children # @param [Array] children
# @return [Oga::XML::Element] # @return [Oga::XML::Element]
#
def on_element_children(element, children = []) def on_element_children(element, children = [])
element.children = children element.children = children
element element
end end
##
# @param [Oga::XML::Element] element # @param [Oga::XML::Element] element
# @return [Oga::XML::Element] # @return [Oga::XML::Element]
#
def after_element(element) def after_element(element)
element element
end end
##
# @param [String] name # @param [String] name
# @param [String] ns_name # @param [String] ns_name
# @param [String] value # @param [String] value
# @return [Oga::XML::Attribute] # @return [Oga::XML::Attribute]
#
def on_attribute(name, ns_name = nil, value = nil) def on_attribute(name, ns_name = nil, value = nil)
Attribute.new( Attribute.new(
:namespace_name => ns_name, :namespace_name => ns_name,
@ -430,9 +395,7 @@ string_body
) )
end end
##
# @param [Array] attrs # @param [Array] attrs
#
def on_attributes(attrs) def on_attributes(attrs)
attrs attrs
end end

View File

@ -1,34 +1,26 @@
module Oga module Oga
module XML module XML
##
# Class used for storing information about a single processing instruction. # Class used for storing information about a single processing instruction.
#
class ProcessingInstruction < CharacterNode class ProcessingInstruction < CharacterNode
# @return [String] # @return [String]
attr_accessor :name attr_accessor :name
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :name The name of the instruction. # @option options [String] :name The name of the instruction.
# @see [Oga::XML::CharacterNode#initialize] # @see [Oga::XML::CharacterNode#initialize]
#
def initialize(options = {}) def initialize(options = {})
super super
@name = options[:name] @name = options[:name]
end end
##
# @return [String] # @return [String]
#
def to_xml def to_xml
"<?#{name}#{text}?>" "<?#{name}#{text}?>"
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
"ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})" "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
end end

View File

@ -1,6 +1,5 @@
module Oga module Oga
module XML module XML
##
# The PullParser class can be used to parse an XML document incrementally # The PullParser class can be used to parse an XML document incrementally
# instead of parsing it as a whole. This results in lower memory usage and # instead of parsing it as a whole. This results in lower memory usage and
# potentially faster parsing times. The downside is that pull parsers are # potentially faster parsing times. The downside is that pull parsers are
@ -18,7 +17,6 @@ module Oga
# #
# This parses yields proper XML instances such as {Oga::XML::Element}. # This parses yields proper XML instances such as {Oga::XML::Element}.
# Doctypes and XML declarations are ignored by this parser. # Doctypes and XML declarations are ignored by this parser.
#
class PullParser < Parser class PullParser < Parser
# @return [Oga::XML::Node] # @return [Oga::XML::Node]
attr_reader :node attr_reader :node
@ -27,9 +25,7 @@ module Oga
# @return [Array] # @return [Array]
attr_reader :nesting attr_reader :nesting
##
# @return [Array] # @return [Array]
#
DISABLED_CALLBACKS = [ DISABLED_CALLBACKS = [
:on_document, :on_document,
:on_doctype, :on_doctype,
@ -37,9 +33,7 @@ module Oga
:on_element_children :on_element_children
] ]
##
# @return [Array] # @return [Array]
#
BLOCK_CALLBACKS = [ BLOCK_CALLBACKS = [
:on_cdata, :on_cdata,
:on_comment, :on_comment,
@ -47,11 +41,9 @@ module Oga
:on_proc_ins :on_proc_ins
] ]
##
# Returns the shorthands that can be used for various node classes. # Returns the shorthands that can be used for various node classes.
# #
# @return [Hash] # @return [Hash]
#
NODE_SHORTHANDS = { NODE_SHORTHANDS = {
:text => XML::Text, :text => XML::Text,
:node => XML::Node, :node => XML::Node,
@ -62,9 +54,7 @@ module Oga
:xml_declaration => XML::XmlDeclaration :xml_declaration => XML::XmlDeclaration
} }
##
# @see Oga::XML::Parser#reset # @see Oga::XML::Parser#reset
#
def reset def reset
super super
@ -73,11 +63,9 @@ module Oga
@node = nil @node = nil
end end
##
# Parses the input and yields every node to the supplied block. # Parses the input and yields every node to the supplied block.
# #
# @yieldparam [Oga::XML::Node] # @yieldparam [Oga::XML::Node]
#
def parse(&block) def parse(&block)
@block = block @block = block
@ -86,7 +74,6 @@ module Oga
return return
end end
##
# Calls the supplied block if the current node type and optionally the # Calls the supplied block if the current node type and optionally the
# nesting match. This method allows you to write this: # nesting match. This method allows you to write this:
# #
@ -120,7 +107,6 @@ module Oga
# returned by {Oga::XML::Node#node_type}. # returned by {Oga::XML::Node#node_type}.
# #
# @param [Array] nesting The element name nesting to act upon. # @param [Array] nesting The element name nesting to act upon.
#
def on(type, nesting = []) def on(type, nesting = [])
if node.is_a?(NODE_SHORTHANDS[type]) if node.is_a?(NODE_SHORTHANDS[type])
if nesting.empty? or nesting == self.nesting if nesting.empty? or nesting == self.nesting
@ -149,9 +135,7 @@ module Oga
EOF EOF
end end
##
# @see Oga::XML::Parser#on_element # @see Oga::XML::Parser#on_element
#
def on_element(*args) def on_element(*args)
@node = super @node = super
@ -162,9 +146,7 @@ module Oga
return return
end end
##
# @see Oga::XML::Parser#on_element_children # @see Oga::XML::Parser#on_element_children
#
def after_element(*args) def after_element(*args)
nesting.pop nesting.pop

View File

@ -1,11 +1,8 @@
module Oga module Oga
module XML module XML
##
# The Querying module provides methods that make it easy to run XPath/CSS # The Querying module provides methods that make it easy to run XPath/CSS
# queries on XML documents/elements. # queries on XML documents/elements.
#
module Querying module Querying
##
# Evaluates the given XPath expression. # Evaluates the given XPath expression.
# #
# Querying a document: # Querying a document:
@ -34,7 +31,6 @@ module Oga
# be String values. # be String values.
# #
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def xpath(expression, variables = {}) def xpath(expression, variables = {})
ast = XPath::Parser.parse_with_cache(expression) ast = XPath::Parser.parse_with_cache(expression)
block = XPath::Compiler.compile_with_cache(ast) block = XPath::Compiler.compile_with_cache(ast)
@ -42,7 +38,6 @@ module Oga
block.call(self, variables) block.call(self, variables)
end end
##
# Evaluates the XPath expression and returns the first matched node. # Evaluates the XPath expression and returns the first matched node.
# #
# Querying a document: # Querying a document:
@ -59,14 +54,12 @@ module Oga
# #
# @see [#xpath] # @see [#xpath]
# @return [Oga::XML::Node|Oga::XML::Attribute] # @return [Oga::XML::Node|Oga::XML::Attribute]
#
def at_xpath(*args) def at_xpath(*args)
result = xpath(*args) result = xpath(*args)
result.is_a?(XML::NodeSet) ? result.first : result result.is_a?(XML::NodeSet) ? result.first : result
end end
##
# Evaluates the given CSS expression. # Evaluates the given CSS expression.
# #
# Querying a document: # Querying a document:
@ -81,7 +74,6 @@ module Oga
# #
# @param [String] expression The CSS expression to run. # @param [String] expression The CSS expression to run.
# @return [Oga::XML::NodeSet] # @return [Oga::XML::NodeSet]
#
def css(expression) def css(expression)
ast = CSS::Parser.parse_with_cache(expression) ast = CSS::Parser.parse_with_cache(expression)
block = XPath::Compiler.compile_with_cache(ast) block = XPath::Compiler.compile_with_cache(ast)
@ -89,12 +81,10 @@ module Oga
block.call(self) block.call(self)
end end
##
# Evaluates the CSS expression and returns the first matched node. # Evaluates the CSS expression and returns the first matched node.
# #
# @see [#css] # @see [#css]
# @return [Oga::XML::Node|Oga::XML::Attribute] # @return [Oga::XML::Node|Oga::XML::Attribute]
#
def at_css(*args) def at_css(*args)
result = css(*args) result = css(*args)

View File

@ -1,6 +1,5 @@
module Oga module Oga
module XML module XML
##
# The SaxParser class provides the basic interface for writing custom SAX # The SaxParser class provides the basic interface for writing custom SAX
# parsers. All callback methods defined in {Oga::XML::Parser} are delegated # parsers. All callback methods defined in {Oga::XML::Parser} are delegated
# to a dedicated handler class. # to a dedicated handler class.
@ -66,12 +65,9 @@ module Oga
# attribute names (optionally prefixed by their namespace) and their values. # attribute names (optionally prefixed by their namespace) and their values.
# You can overwrite `on_attribute` to control individual attributes and # You can overwrite `on_attribute` to control individual attributes and
# `on_attributes` to control the final set. # `on_attributes` to control the final set.
#
class SaxParser < Parser class SaxParser < Parser
##
# @param [Object] handler The SAX handler to delegate callbacks to. # @param [Object] handler The SAX handler to delegate callbacks to.
# @see [Oga::XML::Parser#initialize] # @see [Oga::XML::Parser#initialize]
#
def initialize(handler, *args) def initialize(handler, *args)
@handler = handler @handler = handler
@ -89,38 +85,32 @@ module Oga
EOF EOF
end end
##
# Manually overwrite `on_element` so we can ensure that `after_element` # Manually overwrite `on_element` so we can ensure that `after_element`
# always receives the namespace and name. # always receives the namespace and name.
# #
# @see [Oga::XML::Parser#on_element] # @see [Oga::XML::Parser#on_element]
# @return [Array] # @return [Array]
#
def on_element(namespace, name, attrs = []) def on_element(namespace, name, attrs = [])
run_callback(:on_element, namespace, name, attrs) run_callback(:on_element, namespace, name, attrs)
[namespace, name] [namespace, name]
end end
##
# Manually overwrite `after_element` so it can take a namespace and name. # Manually overwrite `after_element` so it can take a namespace and name.
# This differs a bit from the regular `after_element` which only takes an # This differs a bit from the regular `after_element` which only takes an
# {Oga::XML::Element} instance. # {Oga::XML::Element} instance.
# #
# @param [Array] namespace_with_name # @param [Array] namespace_with_name
#
def after_element(namespace_with_name) def after_element(namespace_with_name)
run_callback(:after_element, *namespace_with_name) run_callback(:after_element, *namespace_with_name)
return return
end end
##
# Manually overwrite this method since for this one we _do_ want the # Manually overwrite this method since for this one we _do_ want the
# return value so it can be passed to `on_element`. # return value so it can be passed to `on_element`.
# #
# @see [Oga::XML::Parser#on_attribute] # @see [Oga::XML::Parser#on_attribute]
#
def on_attribute(name, ns = nil, value = nil) def on_attribute(name, ns = nil, value = nil)
if @handler.respond_to?(:on_attribute) if @handler.respond_to?(:on_attribute)
return run_callback(:on_attribute, name, ns, value) return run_callback(:on_attribute, name, ns, value)
@ -135,12 +125,10 @@ module Oga
{key => value} {key => value}
end end
##
# Merges the attributes together into a Hash. # Merges the attributes together into a Hash.
# #
# @param [Array] attrs # @param [Array] attrs
# @return [Hash] # @return [Hash]
#
def on_attributes(attrs) def on_attributes(attrs)
if @handler.respond_to?(:on_attributes) if @handler.respond_to?(:on_attributes)
return run_callback(:on_attributes, attrs) return run_callback(:on_attributes, attrs)
@ -156,9 +144,7 @@ module Oga
merged merged
end end
##
# @param [String] text # @param [String] text
#
def on_text(text) def on_text(text)
if @handler.respond_to?(:on_text) if @handler.respond_to?(:on_text)
unless inside_literal_html? unless inside_literal_html?
@ -173,17 +159,13 @@ module Oga
private private
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def inside_literal_html? def inside_literal_html?
@lexer.html_script? || @lexer.html_style? @lexer.html_script? || @lexer.html_style?
end end
##
# @param [Symbol] method # @param [Symbol] method
# @param [Array] args # @param [Array] args
#
def run_callback(method, *args) def run_callback(method, *args)
@handler.send(method, *args) if @handler.respond_to?(method) @handler.send(method, *args) if @handler.respond_to?(method)
end end

View File

@ -1,9 +1,7 @@
module Oga module Oga
module XML module XML
##
# Class containing information about a single text node. Text nodes don't # Class containing information about a single text node. Text nodes don't
# have any children, attributes and the likes; just text. # have any children, attributes and the likes; just text.
#
class Text < CharacterNode class Text < CharacterNode
def initialize(*args) def initialize(*args)
super super
@ -11,20 +9,16 @@ module Oga
@decoded = false @decoded = false
end end
##
# @param [String] value # @param [String] value
#
def text=(value) def text=(value)
@decoded = false @decoded = false
@text = value @text = value
end end
##
# Returns the text as a String. Upon the first call any XML/HTML entities # Returns the text as a String. Upon the first call any XML/HTML entities
# are decoded. # are decoded.
# #
# @return [String] # @return [String]
#
def text def text
if decode_entities? if decode_entities?
@text = EntityDecoder.try_decode(@text, html?) @text = EntityDecoder.try_decode(@text, html?)
@ -34,9 +28,7 @@ module Oga
@text @text
end end
##
# @see [Oga::XML::CharacterNode#to_xml] # @see [Oga::XML::CharacterNode#to_xml]
#
def to_xml def to_xml
return super if inside_literal_html? return super if inside_literal_html?
@ -45,16 +37,12 @@ module Oga
private private
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def decode_entities? def decode_entities?
!@decoded && !inside_literal_html? !@decoded && !inside_literal_html?
end end
##
# @return [TrueClass|FalseClass] # @return [TrueClass|FalseClass]
#
def inside_literal_html? def inside_literal_html?
node = parent node = parent

View File

@ -1,10 +1,7 @@
module Oga module Oga
module XML module XML
##
# Module that provides methods to traverse DOM trees. # Module that provides methods to traverse DOM trees.
#
module Traversal module Traversal
##
# Traverses through the node and yields every child node to the supplied # Traverses through the node and yields every child node to the supplied
# block. # block.
# #
@ -29,7 +26,6 @@ module Oga
# end # end
# #
# @yieldparam [Oga::XML::Node] The current node. # @yieldparam [Oga::XML::Node] The current node.
#
def each_node def each_node
visit = children.to_a.reverse visit = children.to_a.reverse

View File

@ -1,8 +1,6 @@
module Oga module Oga
module XML module XML
##
# Class containing information about an XML declaration tag. # Class containing information about an XML declaration tag.
#
class XmlDeclaration class XmlDeclaration
# @return [String] # @return [String]
attr_accessor :version attr_accessor :version
@ -14,24 +12,20 @@ module Oga
# @return [String] # @return [String]
attr_accessor :standalone attr_accessor :standalone
##
# @param [Hash] options # @param [Hash] options
# #
# @option options [String] :version # @option options [String] :version
# @option options [String] :encoding # @option options [String] :encoding
# @option options [String] :standalone # @option options [String] :standalone
#
def initialize(options = {}) def initialize(options = {})
@version = options[:version] || '1.0' @version = options[:version] || '1.0'
@encoding = options[:encoding] || 'UTF-8' @encoding = options[:encoding] || 'UTF-8'
@standalone = options[:standalone] @standalone = options[:standalone]
end end
##
# Converts the declaration tag to XML. # Converts the declaration tag to XML.
# #
# @return [String] # @return [String]
#
def to_xml def to_xml
pairs = [] pairs = []
@ -44,9 +38,7 @@ module Oga
"<?xml #{pairs.join(' ')} ?>" "<?xml #{pairs.join(' ')} ?>"
end end
##
# @return [String] # @return [String]
#
def inspect def inspect
segments = [] segments = []

View File

@ -1,6 +1,5 @@
module Oga module Oga
module XPath module XPath
##
# Compiling of XPath ASTs into Ruby code. # Compiling of XPath ASTs into Ruby code.
# #
# The Compiler class can be used to turn an XPath AST into Ruby source code # The Compiler class can be used to turn an XPath AST into Ruby source code
@ -9,7 +8,6 @@ module Oga
# recompiling the same expression over and over again. # recompiling the same expression over and over again.
# #
# @private # @private
#
class Compiler class Compiler
# @return [Oga::LRU] # @return [Oga::LRU]
CACHE = LRU.new CACHE = LRU.new
@ -36,11 +34,9 @@ module Oga
:on_or => [:to_boolean, :or] :on_or => [:to_boolean, :or]
} }
##
# Compiles and caches an AST. # Compiles and caches an AST.
# #
# @see [#compile] # @see [#compile]
#
def self.compile_with_cache(ast) def self.compile_with_cache(ast)
CACHE.get_or_set(ast) { new.compile(ast) } CACHE.get_or_set(ast) { new.compile(ast) }
end end
@ -57,12 +53,10 @@ module Oga
@predicate_indexes = [] @predicate_indexes = []
end end
##
# Compiles an XPath AST into a Ruby Proc. # Compiles an XPath AST into a Ruby Proc.
# #
# @param [AST::Node] ast # @param [AST::Node] ast
# @return [Proc] # @return [Proc]
#
def compile(ast) def compile(ast)
document = literal(:node) document = literal(:node)
matched = matched_literal matched = matched_literal
@ -97,13 +91,11 @@ module Oga
reset reset
end end
##
# Processes a single XPath AST node. # Processes a single XPath AST node.
# #
# @param [AST::Node] ast # @param [AST::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def process(ast, input, &block) def process(ast, input, &block)
send("on_#{ast.type}", ast, input, &block) send("on_#{ast.type}", ast, input, &block)
end end
@ -119,7 +111,6 @@ module Oga
end end
end end
##
# Dispatches the processing of axes to dedicated methods. This works # Dispatches the processing of axes to dedicated methods. This works
# similar to {#process} except the handler names are "on_axis_X" with "X" # similar to {#process} except the handler names are "on_axis_X" with "X"
# being the axis name. # being the axis name.
@ -127,7 +118,6 @@ module Oga
# @param [AST::Node] ast # @param [AST::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_axis(ast, input, &block) def on_axis(ast, input, &block)
name, test, following = *ast name, test, following = *ast
@ -425,14 +415,12 @@ module Oga
ast ast
end end
##
# Processes a predicate that requires a temporary NodeSet. # Processes a predicate that requires a temporary NodeSet.
# #
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @param [AST::Node] test # @param [AST::Node] test
# @param [AST::Node] predicate # @param [AST::Node] predicate
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_predicate_temporary(input, test, predicate) def on_predicate_temporary(input, test, predicate)
temp_set = unique_literal(:temp_set) temp_set = unique_literal(:temp_set)
pred_node = unique_literal(:pred_node) pred_node = unique_literal(:pred_node)
@ -472,14 +460,12 @@ module Oga
ast ast
end end
##
# Processes a predicate that doesn't require temporary NodeSet. # Processes a predicate that doesn't require temporary NodeSet.
# #
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @param [AST::Node] test # @param [AST::Node] test
# @param [AST::Node] predicate # @param [AST::Node] predicate
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_predicate_direct(input, test, predicate) def on_predicate_direct(input, test, predicate)
pred_var = unique_literal(:pred_var) pred_var = unique_literal(:pred_var)
index_var = predicate_index index_var = predicate_index
@ -514,14 +500,12 @@ module Oga
end end
end end
##
# Processes a predicate that uses a literal index. # Processes a predicate that uses a literal index.
# #
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @param [AST::Node] test # @param [AST::Node] test
# @param [AST::Node] predicate # @param [AST::Node] predicate
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_predicate_index(input, test, predicate) def on_predicate_index(input, test, predicate)
index_var = predicate_index index_var = predicate_index
index_step = literal(1) index_step = literal(1)
@ -549,11 +533,9 @@ module Oga
name_match ? condition.and(name_match) : condition name_match ? condition.and(name_match) : condition
end end
##
# Processes the `=` operator. # Processes the `=` operator.
# #
# @see [#operator] # @see [#operator]
#
def on_eq(ast, input, &block) def on_eq(ast, input, &block)
conv = literal(Conversion) conv = literal(Conversion)
@ -567,11 +549,9 @@ module Oga
end end
end end
##
# Processes the `!=` operator. # Processes the `!=` operator.
# #
# @see [#operator] # @see [#operator]
#
def on_neq(ast, input, &block) def on_neq(ast, input, &block)
conv = literal(Conversion) conv = literal(Conversion)
@ -599,11 +579,9 @@ module Oga
end end
end end
##
# Processes the `|` operator. # Processes the `|` operator.
# #
# @see [#operator] # @see [#operator]
#
def on_pipe(ast, input, &block) def on_pipe(ast, input, &block)
left, right = *ast left, right = *ast
@ -649,13 +627,11 @@ module Oga
.or(send_message(:raise, string("Undefined XPath variable: #{name}"))) .or(send_message(:raise, string("Undefined XPath variable: #{name}")))
end end
##
# Delegates function calls to specific handlers. # Delegates function calls to specific handlers.
# #
# @param [AST::Node] ast # @param [AST::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_call(ast, input, &block) def on_call(ast, input, &block)
name, *args = *ast name, *args = *ast
@ -808,7 +784,6 @@ module Oga
end end
end end
##
# Processes the `id()` function call. # Processes the `id()` function call.
# #
# The XPath specification states that this function's behaviour should be # The XPath specification states that this function's behaviour should be
@ -825,7 +800,6 @@ module Oga
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @param [AST::Node] arg # @param [AST::Node] arg
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_call_id(input, arg) def on_call_id(input, arg)
orig_input = original_input_literal orig_input = original_input_literal
node = unique_literal(:node) node = unique_literal(:node)
@ -1270,13 +1244,11 @@ module Oga
index.to_f index.to_f
end end
##
# Delegates type tests to specific handlers. # Delegates type tests to specific handlers.
# #
# @param [AST::Node] ast # @param [AST::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def on_type_test(ast, input, &block) def on_type_test(ast, input, &block)
name, following = *ast name, following = *ast
@ -1414,13 +1386,11 @@ module Oga
condition condition
end end
##
# Returns an AST matching the first node of a node set. # Returns an AST matching the first node of a node set.
# #
# @param [Oga::Ruby::Node] ast # @param [Oga::Ruby::Node] ast
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def match_first_node(ast, input) def match_first_node(ast, input)
catch_message(:value) do catch_message(:value) do
process(ast, input) do |node| process(ast, input) do |node|
@ -1429,11 +1399,9 @@ module Oga
end end
end end
##
# Tries to match the first node in a set, otherwise processes it as usual. # Tries to match the first node in a set, otherwise processes it as usual.
# #
# @see [#match_first_node] # @see [#match_first_node]
#
def try_match_first_node(ast, input, optimize_first = true) def try_match_first_node(ast, input, optimize_first = true)
if return_nodeset?(ast) and optimize_first if return_nodeset?(ast) and optimize_first
match_first_node(ast, input) match_first_node(ast, input)
@ -1460,7 +1428,6 @@ module Oga
arg_var.assign(arg_ast).followed_by { yield arg_var } arg_var.assign(arg_ast).followed_by { yield arg_var }
end end
##
# Generates the code for an operator. # Generates the code for an operator.
# #
# The generated code is optimized so that expressions such as `a/b = c` # The generated code is optimized so that expressions such as `a/b = c`
@ -1479,7 +1446,6 @@ module Oga
# @param [Oga::Ruby::Node] input # @param [Oga::Ruby::Node] input
# @param [TrueClass|FalseClass] optimize_first # @param [TrueClass|FalseClass] optimize_first
# @return [Oga::Ruby::Node] # @return [Oga::Ruby::Node]
#
def operator(ast, input, optimize_first = true) def operator(ast, input, optimize_first = true)
left, right = *ast left, right = *ast

View File

@ -1,16 +1,12 @@
module Oga module Oga
module XPath module XPath
##
# Module for converting XPath objects such as NodeSets. # Module for converting XPath objects such as NodeSets.
# #
# @private # @private
#
module Conversion module Conversion
##
# Converts both arguments to a type that can be compared using ==. # Converts both arguments to a type that can be compared using ==.
# #
# @return [Array] # @return [Array]
#
def self.to_compatible_types(left, right) def self.to_compatible_types(left, right)
if left.is_a?(XML::NodeSet) or left.respond_to?(:text) if left.is_a?(XML::NodeSet) or left.respond_to?(:text)
left = to_string(left) left = to_string(left)

View File

@ -2,7 +2,6 @@
module Oga module Oga
module XPath module XPath
##
# Lexer for turning XPath expressions into a set of tokens. Tokens are # Lexer for turning XPath expressions into a set of tokens. Tokens are
# returned as arrays with every array having two values: # returned as arrays with every array having two values:
# #
@ -33,18 +32,15 @@ module Oga
# shared state. # shared state.
# #
# @api private # @api private
#
class Lexer class Lexer
%% write data; %% write data;
# % fix highlight # % fix highlight
##
# Maps certain XPath axes written in their short form to their long form # Maps certain XPath axes written in their short form to their long form
# equivalents. # equivalents.
# #
# @return [Hash] # @return [Hash]
#
AXIS_MAPPING = { AXIS_MAPPING = {
'@' => 'attribute', '@' => 'attribute',
'//' => 'descendant-or-self', '//' => 'descendant-or-self',
@ -52,33 +48,25 @@ module Oga
'.' => 'self' '.' => 'self'
} }
##
# Axes that require a separate `node()` call to be emitted. # Axes that require a separate `node()` call to be emitted.
# #
# @return [Array] # @return [Array]
#
AXIS_EMIT_NODE = %w{descendant-or-self parent self} AXIS_EMIT_NODE = %w{descendant-or-self parent self}
##
# Axes that require an extra T_SLASH token to be emitted. # Axes that require an extra T_SLASH token to be emitted.
# #
# @return [Array] # @return [Array]
#
AXIS_EMIT_EXTRA_SLASH = %w{descendant-or-self} AXIS_EMIT_EXTRA_SLASH = %w{descendant-or-self}
##
# @param [String] data The data to lex. # @param [String] data The data to lex.
#
def initialize(data) def initialize(data)
@data = data @data = data
end end
##
# Gathers all the tokens for the input and returns them as an Array. # Gathers all the tokens for the input and returns them as an Array.
# #
# @see [#advance] # @see [#advance]
# @return [Array] # @return [Array]
#
def lex def lex
tokens = [] tokens = []
@ -89,7 +77,6 @@ module Oga
return tokens return tokens
end end
##
# Advances through the input and generates the corresponding tokens. Each # Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block. # token is yielded to the supplied block.
# #
@ -103,7 +90,6 @@ module Oga
# the lexer loop has finished. # the lexer loop has finished.
# #
# @see [#add_token] # @see [#add_token]
#
def advance(&block) def advance(&block)
@block = block @block = block
@ -137,7 +123,6 @@ module Oga
private private
##
# Emits a token of which the value is based on the supplied start/stop # Emits a token of which the value is based on the supplied start/stop
# position. # position.
# #
@ -147,25 +132,21 @@ module Oga
# #
# @see [#text] # @see [#text]
# @see [#add_token] # @see [#add_token]
#
def emit(type, start, stop) def emit(type, start, stop)
value = slice_input(start, stop) value = slice_input(start, stop)
add_token(type, value) add_token(type, value)
end end
##
# Returns the text between the specified start and stop position. # Returns the text between the specified start and stop position.
# #
# @param [Fixnum] start # @param [Fixnum] start
# @param [Fixnum] stop # @param [Fixnum] stop
# @return [String] # @return [String]
#
def slice_input(start, stop) def slice_input(start, stop)
return @data.byteslice(start, stop - start) return @data.byteslice(start, stop - start)
end end
##
# Yields a new token to the supplied block. # Yields a new token to the supplied block.
# #
# @param [Symbol] type The token type. # @param [Symbol] type The token type.
@ -173,7 +154,6 @@ module Oga
# #
# @yieldparam [Symbol] type # @yieldparam [Symbol] type
# @yieldparam [String|NilClass] value # @yieldparam [String|NilClass] value
#
def add_token(type, value = nil) def add_token(type, value = nil)
@block.call(type, value) @block.call(type, value)
end end
@ -228,7 +208,6 @@ module Oga
# #
# Strings can be single or double quoted. They are mainly used for # Strings can be single or double quoted. They are mainly used for
# attribute values. # attribute values.
#
dquote = '"'; dquote = '"';
squote = "'"; squote = "'";
@ -244,7 +223,6 @@ module Oga
# Full Axes # Full Axes
# #
# XPath axes in their full syntax. # XPath axes in their full syntax.
#
axis_full = ('ancestor' axis_full = ('ancestor'
| 'ancestor-or-self' | 'ancestor-or-self'
| 'attribute' | 'attribute'
@ -268,7 +246,6 @@ module Oga
# XPath axes in their abbreviated form. When lexing these are mapped to # XPath axes in their abbreviated form. When lexing these are mapped to
# their full forms so that the parser doesn't have to take care of # their full forms so that the parser doesn't have to take care of
# this. # this.
#
axis_short = '@' | '//' | '..' | '.'; axis_short = '@' | '//' | '..' | '.';
action emit_axis_short { action emit_axis_short {
@ -358,7 +335,6 @@ module Oga
# can not assign variables in an expression, you can only refer to them. # can not assign variables in an expression, you can only refer to them.
# This means that libraries themselves have to expose an interface for # This means that libraries themselves have to expose an interface for
# setting variables. # setting variables.
#
var = '$' identifier; var = '$' identifier;
action emit_variable { action emit_variable {

View File

@ -1,6 +1,5 @@
%header %header
{ {
##
# AST parser for XPath expressions. The AST is built using `AST::Node` # AST parser for XPath expressions. The AST is built using `AST::Node`
# instances. # instances.
# #
@ -248,42 +247,32 @@ variable
%inner %inner
{ {
##
# @return [Oga::LRU] # @return [Oga::LRU]
#
CACHE = LRU.new CACHE = LRU.new
##
# @param [String] data # @param [String] data
# @return [AST::Node] # @return [AST::Node]
#
def self.parse_with_cache(data) def self.parse_with_cache(data)
CACHE.get_or_set(data) { new(data).parse } CACHE.get_or_set(data) { new(data).parse }
end end
##
# @param [String] data The input to parse. # @param [String] data The input to parse.
#
def initialize(data) def initialize(data)
@lexer = Lexer.new(data) @lexer = Lexer.new(data)
end end
##
# Creates a new XPath node. # Creates a new XPath node.
# #
# @param [Symbol] type # @param [Symbol] type
# @param [Array] children # @param [Array] children
# @return [AST::Node] # @return [AST::Node]
#
def s(type, *children) def s(type, *children)
AST::Node.new(type, children) AST::Node.new(type, children)
end end
##
# Yields the next token from the lexer. # Yields the next token from the lexer.
# #
# @yieldparam [Array] # @yieldparam [Array]
#
def each_token def each_token
@lexer.advance do |type, value, line| @lexer.advance do |type, value, line|
@line = line if line @line = line if line
@ -294,9 +283,7 @@ variable
yield [-1, -1] yield [-1, -1]
end end
##
# @param [Array] val # @param [Array] val
#
def combine_operators(val) def combine_operators(val)
ret = val[0] ret = val[0]
@ -307,9 +294,7 @@ variable
ret ret
end end
##
# @param [Array] val # @param [Array] val
#
def combine_optional_operator(val) def combine_optional_operator(val)
ret = val[0] ret = val[0]