ruby-ll: full support for parsing doctypes.
This commit is contained in:
parent
71aefb53cc
commit
15a3ab9ba5
|
@ -1,3 +1,25 @@
|
||||||
|
%header
|
||||||
|
{
|
||||||
|
##
|
||||||
|
# DOM parser for both XML and HTML.
|
||||||
|
#
|
||||||
|
# This parser does not produce a dedicated AST, instead it emits XML nodes
|
||||||
|
# directly. Basic usage of this parser is as following:
|
||||||
|
#
|
||||||
|
# parser = Oga::XML::Parser.new('<foo></foo>')
|
||||||
|
# document = parser.parse
|
||||||
|
#
|
||||||
|
# To enable HTML parsing you'd use the following instead:
|
||||||
|
#
|
||||||
|
# parser = Oga::XML::Parser.new('<foo></foo>', :html => true)
|
||||||
|
# document = parser.parse
|
||||||
|
#
|
||||||
|
# In both cases you can use either a String or an IO as the parser input. IO
|
||||||
|
# instances will result in lower memory overhead, especially when parsing large
|
||||||
|
# files.
|
||||||
|
#
|
||||||
|
}
|
||||||
|
|
||||||
%name Oga::XML::Parser;
|
%name Oga::XML::Parser;
|
||||||
|
|
||||||
%terminals T_TEXT T_STRING_SQUOTE T_STRING_DQUOTE T_STRING_BODY;
|
%terminals T_TEXT T_STRING_SQUOTE T_STRING_DQUOTE T_STRING_BODY;
|
||||||
|
@ -8,12 +30,12 @@
|
||||||
%terminals T_PROC_INS_START T_PROC_INS_NAME T_PROC_INS_END;
|
%terminals T_PROC_INS_START T_PROC_INS_NAME T_PROC_INS_END;
|
||||||
|
|
||||||
document
|
document
|
||||||
= expressions
|
= expressions { on_document(val[0]) }
|
||||||
| _
|
| _ { on_document }
|
||||||
;
|
;
|
||||||
|
|
||||||
expressions
|
expressions
|
||||||
= expression expressions
|
= expression expressions { val[0] + val[1] }
|
||||||
| _
|
| _
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -31,10 +53,20 @@ expression
|
||||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||||
# <!DOCTYPE html PUBLIC "foo" "bar">
|
# <!DOCTYPE html PUBLIC "foo" "bar">
|
||||||
# <!DOCTYPE html [ ... ]>
|
# <!DOCTYPE html [ ... ]>
|
||||||
|
|
||||||
doctype
|
doctype
|
||||||
= T_DOCTYPE_START T_DOCTYPE_NAME doctype_follow
|
= T_DOCTYPE_START T_DOCTYPE_NAME doctype_follow
|
||||||
{
|
{
|
||||||
val[2]
|
name = val[1]
|
||||||
|
follow = val[2]
|
||||||
|
|
||||||
|
on_doctype(
|
||||||
|
:name => name,
|
||||||
|
:type => follow[0],
|
||||||
|
:public_id => follow[1],
|
||||||
|
:system_id => follow[2],
|
||||||
|
:inline_rules => follow[3]
|
||||||
|
)
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -56,6 +88,7 @@ doctype_inline_follow
|
||||||
|
|
||||||
doctype_types
|
doctype_types
|
||||||
= string doctype_types_follow { [val[0], val[1]] }
|
= string doctype_types_follow { [val[0], val[1]] }
|
||||||
|
| T_DOCTYPE_END { nil }
|
||||||
;
|
;
|
||||||
|
|
||||||
doctype_types_follow
|
doctype_types_follow
|
||||||
|
@ -64,6 +97,8 @@ doctype_types_follow
|
||||||
;
|
;
|
||||||
|
|
||||||
# Strings
|
# Strings
|
||||||
|
#
|
||||||
|
# This parses both (empty) single and double quoted strings.
|
||||||
|
|
||||||
string
|
string
|
||||||
= T_STRING_DQUOTE string_dquote_follow { val[1] }
|
= T_STRING_DQUOTE string_dquote_follow { val[1] }
|
||||||
|
@ -85,7 +120,7 @@ string_body
|
||||||
;
|
;
|
||||||
|
|
||||||
string_body_follow
|
string_body_follow
|
||||||
= T_STRING_BODY string_body { val[0] + val[1] }
|
= T_STRING_BODY string_body_follow { val[0] + val[1] }
|
||||||
| _ { '' }
|
| _ { '' }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -133,4 +168,26 @@ string_body_follow
|
||||||
def on_doctype(options = {})
|
def on_doctype(options = {})
|
||||||
return Doctype.new(options)
|
return Doctype.new(options)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
##
|
||||||
|
# @param [Array] children
|
||||||
|
# @return [Oga::XML::Document]
|
||||||
|
#
|
||||||
|
def on_document(children = [])
|
||||||
|
document = Document.new(:type => @lexer.html ? :html : :xml)
|
||||||
|
|
||||||
|
children.each do |child|
|
||||||
|
if child.is_a?(Doctype)
|
||||||
|
document.doctype = child
|
||||||
|
|
||||||
|
elsif child.is_a?(XmlDeclaration)
|
||||||
|
document.xml_declaration = child
|
||||||
|
|
||||||
|
else
|
||||||
|
document.children << child
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return document
|
||||||
|
end
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue