Started porting the XML parser to ruby-ll

This is far from done.
This commit is contained in:
Yorick Peterse 2015-02-13 10:53:34 +01:00
parent 2f67399784
commit 71aefb53cc
2 changed files with 137 additions and 0 deletions

136
lib/oga/xml/parser.rll Normal file
View File

@ -0,0 +1,136 @@
%name Oga::XML::Parser;
%terminals T_TEXT T_STRING_SQUOTE T_STRING_DQUOTE T_STRING_BODY;
%terminals T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE T_DOCTYPE_NAME;
%terminals T_DOCTYPE_INLINE T_CDATA T_COMMENT;
%terminals T_ELEM_START T_ELEM_NAME T_ELEM_NS T_ELEM_END T_ATTR T_ATTR_NS;
%terminals T_XML_DECL_START T_XML_DECL_END;
%terminals T_PROC_INS_START T_PROC_INS_NAME T_PROC_INS_END;
document
= expressions
| _
;
expressions
= expression expressions
| _
;
expression
= doctype
;
# Doctypes
#
# This parses the following:
#
# <!DOCTYPE html>
# <!DOCTYPE html PUBLIC>
# <!DOCTYPE html PUBLIC "foo">
# <!DOCTYPE html PUBLIC "foo" "bar">
# <!DOCTYPE html PUBLIC "foo" "bar">
# <!DOCTYPE html [ ... ]>
doctype
= T_DOCTYPE_START T_DOCTYPE_NAME doctype_follow
{
val[2]
}
;
# Returns: [T_DOCTYPE_TYPE, string, string, doctype_inline]
doctype_follow
= T_DOCTYPE_END { [] }
| T_DOCTYPE_TYPE doctype_types { [val[0], *val[1]] }
| doctype_inline T_DOCTYPE_END { [nil, nil, nil, val[0]] }
;
doctype_inline
= T_DOCTYPE_INLINE doctype_inline_follow { val[0] + val[1] }
;
doctype_inline_follow
= doctype_inline { val[0] }
| _ { '' }
;
doctype_types
= string doctype_types_follow { [val[0], val[1]] }
;
doctype_types_follow
= string T_DOCTYPE_END { val[0] }
| T_DOCTYPE_END { nil }
;
# Strings
string
= T_STRING_DQUOTE string_dquote_follow { val[1] }
| T_STRING_SQUOTE string_squote_follow { val[1] }
;
string_dquote_follow
= T_STRING_DQUOTE { '' }
| string_body T_STRING_DQUOTE { val[0] }
;
string_squote_follow
= T_STRING_SQUOTE { '' }
| string_body T_STRING_SQUOTE { val[0] }
;
string_body
= T_STRING_BODY string_body_follow { val[0] + val[1] }
;
string_body_follow
= T_STRING_BODY string_body { val[0] + val[1] }
| _ { '' }
;
%inner
{
##
# @param [String|IO] data The input to parse.
# @param [Hash] options
# @see [Oga::XML::Lexer#initialize]
#
def initialize(data, options = {})
@data = data
@lexer = Lexer.new(data, options)
reset
end
##
# Resets the internal state of the parser.
#
def reset
@line = 1
@lexer.reset
end
##
# Yields the next token from the lexer.
#
# @yieldparam [Array]
#
def each_token
@lexer.advance do |type, value, line|
@line = line if line
yield [type, value]
end
yield [-1, -1]
end
##
# @param [Hash] options
#
def on_doctype(options = {})
return Doctype.new(options)
end
}

View File

@ -34,6 +34,7 @@ Gem::Specification.new do |s|
s.add_dependency 'racc', ['~> 1.4', '>= 1.4.12'] s.add_dependency 'racc', ['~> 1.4', '>= 1.4.12']
s.add_dependency 'ast' s.add_dependency 'ast'
s.add_dependency 'ruby-ll'
s.add_development_dependency 'rake' s.add_development_dependency 'rake'
s.add_development_dependency 'rspec', ['~> 3.0'] s.add_development_dependency 'rspec', ['~> 3.0']