diff --git a/.gitignore b/.gitignore index c2aca8b..7ce2579 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,5 @@ coverage pkg Gemfile.lock -lib/oga/lexer.rb -lib/oga/parser.rb +lib/oga/xml/lexer.rb +lib/oga/xml/parser.rb diff --git a/lib/oga/xml/lexer.rb b/lib/oga/xml/lexer.rb deleted file mode 100644 index d3944e1..0000000 --- a/lib/oga/xml/lexer.rb +++ /dev/null @@ -1,1108 +0,0 @@ - -# line 1 "lib/oga/xml/lexer.rl" - -# line 3 "lib/oga/xml/lexer.rl" -module Oga - module XML - ## - # Low level lexer that supports both XML and HTML (using an extra option). To - # lex HTML input set the `:html` option to `true` when creating an instance - # of the lexer: - # - # lexer = Oga::Lexer.new(:html => true) - # - # @!attribute [r] html - # @return [TrueClass|FalseClass] - # - class Lexer - -# line 20 "lib/oga/xml/lexer.rb" -class << self - attr_accessor :_lexer_trans_keys - private :_lexer_trans_keys, :_lexer_trans_keys= -end -self._lexer_trans_keys = [ - 0, 0, 45, 100, 45, 45, - 79, 111, 67, 99, 84, - 116, 89, 121, 80, 112, - 69, 101, 9, 32, 9, 104, - 84, 116, 77, 109, 76, - 108, 67, 67, 68, 68, - 65, 65, 84, 84, 65, 65, - 91, 91, 45, 122, 45, - 122, 120, 120, 109, 109, - 108, 108, 85, 85, 66, 66, - 76, 76, 73, 73, 67, - 67, 89, 89, 83, 83, - 84, 84, 69, 69, 77, 77, - 62, 62, 62, 62, 10, - 10, 47, 62, 62, 62, - 33, 122, 45, 122, 34, 34, - 39, 39, 9, 83, 93, - 93, 93, 93, 45, 45, - 45, 45, 63, 63, 62, 62, - 9, 122, 45, 122, 0 -] - -class << self - attr_accessor :_lexer_key_spans - private :_lexer_key_spans, :_lexer_key_spans= -end -self._lexer_key_spans = [ - 0, 56, 1, 33, 33, 33, 33, 33, - 33, 24, 96, 33, 33, 33, 1, 1, - 1, 1, 1, 1, 78, 78, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 16, 1, - 90, 78, 1, 1, 75, 1, 1, 1, - 1, 1, 1, 114, 78 -] - -class << self - attr_accessor :_lexer_index_offsets - private :_lexer_index_offsets, :_lexer_index_offsets= -end -self._lexer_index_offsets = [ - 0, 0, 57, 59, 93, 127, 161, 195, - 229, 263, 288, 385, 419, 453, 487, 489, - 491, 493, 495, 497, 499, 578, 657, 659, - 661, 663, 665, 667, 669, 671, 673, 675, - 677, 679, 681, 683, 685, 687, 689, 706, - 708, 799, 878, 880, 882, 958, 960, 962, - 964, 966, 968, 970, 1085 -] - -class << self - attr_accessor :_lexer_indicies - private :_lexer_indicies, :_lexer_indicies= -end -self._lexer_indicies = [ - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 2, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 3, 0, - 0, 0, 0, 0, 0, 0, 0, 2, - 0, 4, 0, 5, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 5, 0, 6, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 6, 0, 7, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 7, - 0, 8, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 8, 0, 9, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 9, 0, 10, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 10, 0, 11, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 11, 0, - 11, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 11, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 12, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 12, - 0, 13, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 13, 0, 14, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 14, 0, 15, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 15, 0, 16, - 0, 17, 0, 18, 0, 19, 0, 20, - 0, 21, 0, 22, 0, 0, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 0, 0, 0, 0, 0, 0, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 0, 0, 0, 0, 22, 0, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 0, 22, 0, 0, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 0, 0, 0, 23, 0, 0, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 0, 0, 0, 0, 22, 0, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, - 0, 24, 0, 25, 0, 26, 0, 27, - 28, 29, 28, 30, 28, 31, 28, 32, - 28, 33, 28, 34, 28, 35, 28, 36, - 28, 32, 28, 38, 37, 40, 39, 41, - 28, 43, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 44, 42, - 45, 42, 47, 46, 48, 46, 46, 46, - 46, 46, 46, 46, 46, 46, 46, 46, - 49, 46, 50, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 46, 46, - 46, 46, 51, 46, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 46, 46, - 46, 46, 49, 46, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 46, 49, - 52, 52, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 52, 52, 52, - 52, 52, 52, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 52, 52, 52, - 52, 49, 52, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 49, 49, 49, - 49, 49, 49, 49, 49, 52, 54, 53, - 56, 55, 57, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 57, 28, 58, 28, 28, 28, 28, - 59, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 60, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 61, 28, 28, 62, 28, 64, 63, - 66, 65, 68, 67, 70, 69, 72, 71, - 74, 73, 75, 41, 28, 28, 76, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, - 28, 75, 28, 77, 28, 28, 28, 28, - 78, 28, 28, 28, 28, 28, 79, 28, - 80, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 28, 28, 75, 80, - 28, 28, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 28, 28, 28, 28, - 79, 28, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 28, 79, 81, 81, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 81, 81, 81, 81, 81, - 81, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 81, 81, 81, 81, 79, - 81, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 79, 79, 79, 79, 79, - 79, 79, 79, 81, 0 -] - -class << self - attr_accessor :_lexer_trans_targs - private :_lexer_trans_targs, :_lexer_trans_targs= -end -self._lexer_trans_targs = [ - 38, 2, 3, 14, 38, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 38, - 15, 16, 17, 18, 19, 38, 21, 38, - 23, 24, 38, 26, 0, 27, 28, 29, - 44, 31, 32, 33, 34, 45, 45, 47, - 47, 51, 38, 39, 40, 38, 38, 38, - 1, 41, 20, 22, 38, 42, 42, 43, - 43, 44, 44, 44, 44, 25, 30, 45, - 46, 45, 35, 47, 48, 47, 36, 49, - 50, 49, 49, 51, 37, 51, 51, 52, - 51, 51 -] - -class << self - attr_accessor :_lexer_trans_actions - private :_lexer_trans_actions, :_lexer_trans_actions= -end -self._lexer_trans_actions = [ - 1, 0, 0, 0, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 3, - 0, 0, 0, 0, 0, 4, 0, 5, - 0, 0, 6, 0, 0, 0, 0, 0, - 7, 0, 0, 0, 0, 8, 9, 10, - 11, 12, 15, 0, 16, 17, 18, 19, - 0, 0, 0, 0, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 0, 0, 29, - 16, 30, 0, 31, 16, 32, 0, 33, - 0, 34, 35, 36, 0, 37, 38, 0, - 39, 40 -] - -class << self - attr_accessor :_lexer_to_state_actions - private :_lexer_to_state_actions, :_lexer_to_state_actions= -end -self._lexer_to_state_actions = [ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 13, 0, - 0, 0, 13, 13, 13, 13, 0, 13, - 0, 13, 0, 13, 0 -] - -class << self - attr_accessor :_lexer_from_state_actions - private :_lexer_from_state_actions, :_lexer_from_state_actions= -end -self._lexer_from_state_actions = [ - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 14, 0, - 0, 0, 14, 14, 14, 14, 0, 14, - 0, 14, 0, 14, 0 -] - -class << self - attr_accessor :_lexer_eof_trans - private :_lexer_eof_trans, :_lexer_eof_trans= -end -self._lexer_eof_trans = [ - 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 38, 40, 0, 0, 47, - 47, 53, 0, 0, 0, 0, 66, 0, - 70, 0, 74, 0, 82 -] - -class << self - attr_accessor :lexer_start -end -self.lexer_start = 38; -class << self - attr_accessor :lexer_first_final -end -self.lexer_first_final = 38; -class << self - attr_accessor :lexer_error -end -self.lexer_error = 0; - -class << self - attr_accessor :lexer_en_string_dquote -end -self.lexer_en_string_dquote = 42; -class << self - attr_accessor :lexer_en_string_squote -end -self.lexer_en_string_squote = 43; -class << self - attr_accessor :lexer_en_doctype -end -self.lexer_en_doctype = 44; -class << self - attr_accessor :lexer_en_cdata -end -self.lexer_en_cdata = 45; -class << self - attr_accessor :lexer_en_comment -end -self.lexer_en_comment = 47; -class << self - attr_accessor :lexer_en_xml_decl -end -self.lexer_en_xml_decl = 49; -class << self - attr_accessor :lexer_en_element_head -end -self.lexer_en_element_head = 51; -class << self - attr_accessor :lexer_en_main -end -self.lexer_en_main = 38; - - -# line 18 "lib/oga/xml/lexer.rl" - attr_reader :html - - ## - # Names of the HTML void elements that should be handled when HTML lexing - # is enabled. - # - # @return [Array] - # - HTML_VOID_ELEMENTS = [ - 'area', - 'base', - 'br', - 'col', - 'command', - 'embed', - 'hr', - 'img', - 'input', - 'keygen', - 'link', - 'meta', - 'param', - 'source', - 'track', - 'wbr' - ] - - # Lazy way of forwarding instance method calls used internally by Ragel to - # their corresponding class methods. - private_methods.grep(/^_lexer_/).each do |name| - define_method(name) do - return self.class.send(name) - end - - private(name) - end - - ## - # @param [Hash] options - # - # @option options [Symbol] :html When set to `true` the lexer will treat - # the input as HTML instead of SGML/XML. This makes it possible to lex - # HTML void elements such as ``. - # - def initialize(options = {}) - options.each do |key, value| - instance_variable_set("@#{key}", value) if respond_to?(key) - end - - reset - end - - ## - # Resets the internal state of the lexer. Typically you don't need to call - # this method yourself as its called by #lex after lexing a given String. - # - def reset - @line = 1 - @data = nil - @ts = nil - @te = nil - @tokens = [] - @stack = [] - @top = 0 - @elements = [] - - @buffer_start_position = nil - end - - ## - # Lexes the supplied String and returns an Array of tokens. Each token is - # an Array in the following format: - # - # [TYPE, VALUE] - # - # The type is a symbol, the value is either nil or a String. - # - # @param [String] data The string to lex. - # @return [Array] - # - def lex(data) - @data = data.unpack('U*') - lexer_start = self.class.lexer_start - eof = data.length - - -# line 441 "lib/oga/xml/lexer.rb" -begin - p ||= 0 - pe ||= @data.length - @cs = lexer_start - @top = 0 - @ts = nil - @te = nil - @act = 0 -end - -# line 104 "lib/oga/xml/lexer.rl" - -# line 454 "lib/oga/xml/lexer.rb" -begin - testEof = false - _slen, _trans, _keys, _inds, _acts, _nacts = nil - _goto_level = 0 - _resume = 10 - _eof_trans = 15 - _again = 20 - _test_eof = 30 - _out = 40 - while true - if _goto_level <= 0 - if p == pe - _goto_level = _test_eof - next - end - if @cs == 0 - _goto_level = _out - next - end - end - if _goto_level <= _resume - case _lexer_from_state_actions[ @cs] - when 14 then -# line 1 "NONE" - begin - @ts = p - end -# line 482 "lib/oga/xml/lexer.rb" - end - _keys = @cs << 1 - _inds = _lexer_index_offsets[ @cs] - _slen = _lexer_key_spans[ @cs] - _trans = if ( _slen > 0 && - _lexer_trans_keys[_keys] <= ( (@data[p] || 0)) && - ( (@data[p] || 0)) <= _lexer_trans_keys[_keys + 1] - ) then - _lexer_indicies[ _inds + ( (@data[p] || 0)) - _lexer_trans_keys[_keys] ] - else - _lexer_indicies[ _inds + _slen ] - end - end - if _goto_level <= _eof_trans - @cs = _lexer_trans_targs[_trans] - if _lexer_trans_actions[_trans] != 0 - case _lexer_trans_actions[_trans] - when 16 then -# line 1 "NONE" - begin - @te = p+1 - end - when 22 then -# line 254 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer(@ts, :T_STRING) - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 21 then -# line 259 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 24 then -# line 264 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer(@ts, :T_STRING) - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 23 then -# line 269 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 7 then -# line 293 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin emit(:T_DOCTYPE_TYPE) end - end - when 26 then -# line 240 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 42 - _goto_level = _again - next - end - - end - end - when 27 then -# line 246 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 43 - _goto_level = _again - next - end - - end - end - when 25 then -# line 301 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 28 then -# line 303 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - add_token(:T_DOCTYPE_END) - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 9 then -# line 334 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_CDATA_END) - - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 29 then -# line 341 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 30 then -# line 341 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; end - when 8 then -# line 341 "lib/oga/xml/lexer.rl" - begin - begin p = (( @te))-1; end - end - when 11 then -# line 370 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_COMMENT_END) - - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 31 then -# line 377 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 32 then -# line 377 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; end - when 10 then -# line 377 "lib/oga/xml/lexer.rl" - begin - begin p = (( @te))-1; end - end - when 35 then -# line 398 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_XML_DECL_END) - - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 33 then -# line 405 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 34 then -# line 405 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; end - when 36 then -# line 446 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - end - when 12 then -# line 448 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin advance_line end - end - when 37 then -# line 240 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 42 - _goto_level = _again - next - end - - end - end - when 38 then -# line 246 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 43 - _goto_level = _again - next - end - - end - end - when 39 then -# line 458 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - p = p - 1; - begin - @top -= 1 - @cs = @stack[ @top] - _goto_level = _again - next - end - - end - end - when 40 then -# line 451 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; begin emit(:T_ATTR) end - end - when 3 then -# line 284 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_DOCTYPE_START) - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 44 - _goto_level = _again - next - end - - end - end - when 4 then -# line 322 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_CDATA_START) - - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 45 - _goto_level = _again - next - end - - end - end - when 2 then -# line 358 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_COMMENT_START) - - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 47 - _goto_level = _again - next - end - - end - end - when 6 then -# line 387 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_XML_DECL_START) - - start_buffer - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 49 - _goto_level = _again - next - end - - end - end - when 17 then -# line 473 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - if html? and HTML_VOID_ELEMENTS.include?(current_element) - add_token(:T_ELEM_END, nil) - @elements.pop - end - end - end - when 5 then -# line 481 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - emit_buffer - add_token(:T_ELEM_END, nil) - - @elements.pop - end - end - when 19 then -# line 489 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - add_token(:T_ELEM_END, nil) - - @elements.pop - end - end - when 15 then -# line 497 "lib/oga/xml/lexer.rl" - begin - @te = p+1 - begin - # First character, start buffering (unless we already are buffering). - start_buffer(@ts) unless buffering? - - # EOF, emit the text buffer. - if @te == eof - emit_buffer(@te) - end - end - end - when 20 then -# line 415 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; begin - emit_buffer - add_token(:T_ELEM_START) - - # Add the element name. If the name includes a namespace we'll break - # the name up into two separate tokens. - name = text(@ts + 1) - - if name.include?(':') - ns, name = name.split(':') - - add_token(:T_ELEM_NS, ns) - end - - @elements << name - - add_token(:T_ELEM_NAME, name) - - begin - @stack[ @top] = @cs - @top+= 1 - @cs = 51 - _goto_level = _again - next - end - - end - end - when 18 then -# line 497 "lib/oga/xml/lexer.rl" - begin - @te = p -p = p - 1; begin - # First character, start buffering (unless we already are buffering). - start_buffer(@ts) unless buffering? - - # EOF, emit the text buffer. - if @te == eof - emit_buffer(@te) - end - end - end - when 1 then -# line 497 "lib/oga/xml/lexer.rl" - begin - begin p = (( @te))-1; end - begin - # First character, start buffering (unless we already are buffering). - start_buffer(@ts) unless buffering? - - # EOF, emit the text buffer. - if @te == eof - emit_buffer(@te) - end - end - end -# line 945 "lib/oga/xml/lexer.rb" - end - end - end - if _goto_level <= _again - case _lexer_to_state_actions[ @cs] - when 13 then -# line 1 "NONE" - begin - @ts = nil; end -# line 955 "lib/oga/xml/lexer.rb" - end - - if @cs == 0 - _goto_level = _out - next - end - p += 1 - if p != pe - _goto_level = _resume - next - end - end - if _goto_level <= _test_eof - if p == eof - if _lexer_eof_trans[ @cs] > 0 - _trans = _lexer_eof_trans[ @cs] - 1; - _goto_level = _eof_trans - next; - end - end - - end - if _goto_level <= _out - break - end -end - end - -# line 105 "lib/oga/xml/lexer.rl" - - tokens = @tokens - - reset - - return tokens - end - - ## - # @return [TrueClass|FalseClass] - # - def html? - return !!html - end - - private - - ## - # @param [Fixnum] amount The amount of lines to advance. - # - def advance_line(amount = 1) - @line += amount - end - - ## - # Emits a token who's value is based on the supplied start/stop position. - # - # @param [Symbol] type The token type. - # @param [Fixnum] start - # @param [Fixnum] stop - # - # @see #text - # @see #add_token - # - def emit(type, start = @ts, stop = @te) - value = text(start, stop) - - add_token(type, value) - end - - ## - # Returns the text of the current buffer based on the supplied start and - # stop position. - # - # By default `@ts` and `@te` are used as the start/stop position. - # - # @param [Fixnum] start - # @param [Fixnum] stop - # @return [String] - # - def text(start = @ts, stop = @te) - return @data[start...stop].pack('U*') - end - - ## - # Adds a token with the given type and value to the list. - # - # @param [Symbol] type The token type. - # @param [String] value The token value. - # - def add_token(type, value = nil) - token = [type, value, @line] - - @tokens << token - end - - ## - # Enables buffering starting at the given position. - # - # @param [Fixnum] position The start position of the buffer, set to `@te` - # by default. - # - def start_buffer(position = @te) - @buffer_start_position = position - end - - ## - # Returns `true` if we're currently buffering. - # - # @return [TrueClass|FalseClass] - # - def buffering? - return !!@buffer_start_position - end - - ## - # Emits the current buffer if we have any. The current line number is - # advanced based on the amount of newlines in the buffer. - # - # @param [Fixnum] position The end position of the buffer, set to `@ts` by - # default. - # - # @param [Symbol] type The type of node to emit. - # - def emit_buffer(position = @ts, type = :T_TEXT) - return unless @buffer_start_position - - content = text(@buffer_start_position, position) - - unless content.empty? - add_token(type, content) - - lines = content.count("\n") - - advance_line(lines) if lines > 0 - end - - @buffer_start_position = nil - end - - ## - # Returns the name of the element we're currently in. - # - # @return [String] - # - def current_element - return @elements.last - end - - -# line 507 "lib/oga/xml/lexer.rl" - - end # Lexer - end # XML -end # Oga diff --git a/lib/oga/xml/parser.rb b/lib/oga/xml/parser.rb deleted file mode 100644 index 784d878..0000000 --- a/lib/oga/xml/parser.rb +++ /dev/null @@ -1,402 +0,0 @@ -# -# DO NOT MODIFY!!!! -# This file is automatically generated by Racc 1.4.11 -# from Racc grammer file "". -# - -require 'racc/parser.rb' -module Oga - module XML - class Parser < Racc::Parser - - ## - # @param [Hash] options - # - # @option options [TrueClass|FalseClass] :html Enables HTML parsing mode. - # @see Oga::Lexer#initialize - # - def initialize(options = {}) - @lexer = Lexer.new(options) - end - - ## - # Resets the internal state of the parser. - # - def reset - @lines = [] - @line = 1 - end - - ## - # Emits a new AST token. - # - # @param [Symbol] type - # @param [Array] children - # - def s(type, *children) - return AST::Node.new( - type, - children.flatten, - :line => @line - ) - end - - ## - # Returns the next token from the lexer. - # - # @return [Array] - # - def next_token - type, value, line = @tokens.shift - - @line = line if line - - return type ? [type, value] : [false, false] - end - - ## - # @param [Fixnum] type The type of token the error occured on. - # @param [String] value The value of the token. - # @param [Array] stack The current stack of parsed nodes. - # @raise [Racc::ParseError] - # - def on_error(type, value, stack) - name = token_to_str(type) - index = @line - 1 - lines = '' - - # Show up to 5 lines before and after the offending line (if they exist). - (-5..5).each do |offset| - line = @lines[index + offset] - number = @line + offset - - if line and number > 0 - if offset == 0 - prefix = '=> ' - else - prefix = ' ' - end - - lines << "#{prefix}#{number}: #{line.strip}\n" - end - end - - raise Racc::ParseError, <<-EOF -Unexpected #{name} with value #{value.inspect} on line #{@line}: - -#{lines} - EOF - end - - ## - # Parses the supplied string and returns the AST. - # - # @example - # parser = Oga::Parser.new - # ast = parser.parse('bar') - # - # @param [String] string - # @return [Oga::AST::Node] - # - def parse(string) - @lines = string.lines - @tokens = @lexer.lex(string) - ast = do_parse - - reset - - return ast - end - -# vim: set ft=racc: -##### State transition tables begin ### - -racc_action_table = [ - 16, 40, 16, 10, 24, 37, 11, 22, 12, 28, - 14, 23, 21, 45, 31, 15, 16, 10, 44, 28, - 11, 43, 12, 36, 14, 35, 16, 10, 34, 15, - 11, 41, 12, 42, 14, 33, 16, 10, 17, 15, - 11, 46, 12, nil, 14, 29, 30, 19, 20, 15 ] - -racc_action_check = [ - 15, 28, 38, 38, 12, 24, 38, 11, 38, 13, - 38, 12, 11, 38, 15, 38, 2, 2, 35, 26, - 2, 35, 2, 22, 2, 20, 25, 25, 20, 2, - 25, 30, 25, 32, 25, 17, 0, 0, 1, 25, - 0, 44, 0, nil, 0, 14, 14, 10, 10, 0 ] - -racc_action_pointer = [ - 33, 38, 13, nil, nil, nil, nil, nil, nil, nil, - 42, 4, 1, -6, 33, -3, nil, 35, nil, nil, - 23, nil, 15, nil, -5, 23, 4, nil, -1, nil, - 19, nil, 16, nil, nil, 16, nil, nil, -1, nil, - nil, nil, nil, nil, 36, nil, nil ] - -racc_action_default = [ - -2, -32, -1, -4, -6, -7, -8, -9, -10, -11, - -32, -32, -32, -24, -32, -32, -31, -32, -3, -12, - -32, -16, -32, -18, -32, -5, -23, -26, -27, -21, - -32, -29, -32, 47, -13, -32, -17, -19, -32, -25, - -28, -22, -30, -14, -32, -20, -15 ] - -racc_goto_table = [ - 18, 2, 27, 32, 25, 26, 1, nil, nil, nil, - nil, nil, nil, nil, nil, 39, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 38, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 18 ] - -racc_goto_check = [ - 3, 2, 13, 8, 11, 12, 1, nil, nil, nil, - nil, nil, nil, nil, nil, 13, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 2, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 3 ] - -racc_goto_pointer = [ - nil, 6, 1, -2, nil, nil, nil, nil, -12, nil, - nil, -9, -8, -11 ] - -racc_goto_default = [ - nil, nil, nil, 3, 4, 5, 6, 7, 8, 9, - 13, nil, nil, nil ] - -racc_reduce_table = [ - 0, 0, :racc_error, - 1, 19, :_reduce_1, - 0, 19, :_reduce_2, - 2, 20, :_reduce_3, - 1, 20, :_reduce_4, - 0, 20, :_reduce_5, - 1, 21, :_reduce_none, - 1, 21, :_reduce_none, - 1, 21, :_reduce_none, - 1, 21, :_reduce_none, - 1, 21, :_reduce_none, - 1, 21, :_reduce_none, - 2, 22, :_reduce_12, - 3, 22, :_reduce_13, - 4, 22, :_reduce_14, - 5, 22, :_reduce_15, - 2, 23, :_reduce_16, - 3, 23, :_reduce_17, - 2, 24, :_reduce_18, - 3, 24, :_reduce_19, - 4, 25, :_reduce_20, - 2, 28, :_reduce_21, - 3, 28, :_reduce_22, - 1, 29, :_reduce_23, - 0, 29, :_reduce_24, - 2, 30, :_reduce_25, - 1, 30, :_reduce_26, - 1, 31, :_reduce_27, - 2, 31, :_reduce_28, - 2, 27, :_reduce_29, - 3, 27, :_reduce_30, - 1, 26, :_reduce_31 ] - -racc_reduce_n = 32 - -racc_shift_n = 47 - -racc_token_table = { - false => 0, - :error => 1, - :T_STRING => 2, - :T_TEXT => 3, - :T_DOCTYPE_START => 4, - :T_DOCTYPE_END => 5, - :T_DOCTYPE_TYPE => 6, - :T_CDATA_START => 7, - :T_CDATA_END => 8, - :T_COMMENT_START => 9, - :T_COMMENT_END => 10, - :T_ELEM_START => 11, - :T_ELEM_NAME => 12, - :T_ELEM_NS => 13, - :T_ELEM_END => 14, - :T_ATTR => 15, - :T_XML_DECL_START => 16, - :T_XML_DECL_END => 17 } - -racc_nt_base = 18 - -racc_use_result_var = false - -Racc_arg = [ - racc_action_table, - racc_action_check, - racc_action_default, - racc_action_pointer, - racc_goto_table, - racc_goto_check, - racc_goto_default, - racc_goto_pointer, - racc_nt_base, - racc_reduce_table, - racc_token_table, - racc_shift_n, - racc_reduce_n, - racc_use_result_var ] - -Racc_token_to_s_table = [ - "$end", - "error", - "T_STRING", - "T_TEXT", - "T_DOCTYPE_START", - "T_DOCTYPE_END", - "T_DOCTYPE_TYPE", - "T_CDATA_START", - "T_CDATA_END", - "T_COMMENT_START", - "T_COMMENT_END", - "T_ELEM_START", - "T_ELEM_NAME", - "T_ELEM_NS", - "T_ELEM_END", - "T_ATTR", - "T_XML_DECL_START", - "T_XML_DECL_END", - "$start", - "document", - "expressions", - "expression", - "doctype", - "cdata", - "comment", - "element", - "text", - "xmldecl", - "element_open", - "attributes", - "attributes_", - "attribute" ] - -Racc_debug_parser = false - -##### State transition tables end ##### - -# reduce 0 omitted - -def _reduce_1(val, _values) - s(:document, val[0]) -end - -def _reduce_2(val, _values) - s(:document) -end - -def _reduce_3(val, _values) - val.compact -end - -def _reduce_4(val, _values) - val[0] -end - -def _reduce_5(val, _values) - nil -end - -# reduce 6 omitted - -# reduce 7 omitted - -# reduce 8 omitted - -# reduce 9 omitted - -# reduce 10 omitted - -# reduce 11 omitted - -def _reduce_12(val, _values) - s(:doctype) -end - -def _reduce_13(val, _values) - s(:doctype, val[1]) - -end - -def _reduce_14(val, _values) - s(:doctype, val[1], val[2]) - -end - -def _reduce_15(val, _values) - s(:doctype, val[1], val[2], val[3]) - -end - -def _reduce_16(val, _values) - s(:cdata) -end - -def _reduce_17(val, _values) - s(:cdata, val[1]) -end - -def _reduce_18(val, _values) - s(:comment) -end - -def _reduce_19(val, _values) - s(:comment, val[1]) -end - -def _reduce_20(val, _values) - s(:element, val[0], val[1], val[2]) - -end - -def _reduce_21(val, _values) - [nil, val[1]] -end - -def _reduce_22(val, _values) - [val[1], val[2]] -end - -def _reduce_23(val, _values) - s(:attributes, val[0]) -end - -def _reduce_24(val, _values) - nil -end - -def _reduce_25(val, _values) - val -end - -def _reduce_26(val, _values) - val -end - -def _reduce_27(val, _values) - s(:attribute, val[0]) -end - -def _reduce_28(val, _values) - s(:attribute, val[0], val[1]) -end - -def _reduce_29(val, _values) - s(:xml_decl) -end - -def _reduce_30(val, _values) - s(:xml_decl, val[1]) -end - -def _reduce_31(val, _values) - s(:text, val[0]) -end - -def _reduce_none(val, _values) - val[0] -end - - end # class Parser - end # module XML - end # module Oga