Support for inline doctype rules plus newlines.
This adds support for lexing/parsing XML documents that use an IO as input _and_ contain doctype rules with newlines in them. This fixes #63.
This commit is contained in:
parent
f88df486ba
commit
cbb2815146
|
@ -183,6 +183,22 @@
|
|||
fnext doctype;
|
||||
}
|
||||
|
||||
# Machine for processing inline rules of a doctype.
|
||||
doctype_inline := |*
|
||||
^']'* $count_newlines => {
|
||||
callback("on_doctype_inline", data, encoding, ts, te);
|
||||
|
||||
if ( lines > 0 )
|
||||
{
|
||||
advance_line(lines);
|
||||
|
||||
lines = 0;
|
||||
}
|
||||
};
|
||||
|
||||
']' => { fnext doctype; };
|
||||
*|;
|
||||
|
||||
# Machine for processing doctypes. Doctype values such as the public
|
||||
# and system IDs are treated as T_STRING tokens.
|
||||
doctype := |*
|
||||
|
@ -190,11 +206,8 @@
|
|||
callback("on_doctype_type", data, encoding, ts, te);
|
||||
};
|
||||
|
||||
# Consumes everything between the [ and ]. Due to the use of :> the ]
|
||||
# is not consumed by any+.
|
||||
'[' any+ :> ']' => {
|
||||
callback("on_doctype_inline", data, encoding, ts + 1, te - 1);
|
||||
};
|
||||
# Starts a set of inline doctype rules.
|
||||
'[' => { fnext doctype_inline; };
|
||||
|
||||
# Lex the public/system IDs as regular strings.
|
||||
squote => start_string_squote;
|
||||
|
|
|
@ -86,12 +86,17 @@ rule
|
|||
}
|
||||
|
||||
# <!DOCTYPE html [ ... ]>
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME T_DOCTYPE_INLINE T_DOCTYPE_END
|
||||
| T_DOCTYPE_START T_DOCTYPE_NAME doctype_inline T_DOCTYPE_END
|
||||
{
|
||||
on_doctype(:name => val[1], :inline_rules => val[2])
|
||||
}
|
||||
;
|
||||
|
||||
doctype_inline
|
||||
: T_DOCTYPE_INLINE { val[0] }
|
||||
| doctype_inline T_DOCTYPE_INLINE { val[0] + val[1] }
|
||||
;
|
||||
|
||||
# CDATA tags
|
||||
|
||||
cdata
|
||||
|
|
|
@ -49,6 +49,46 @@ describe Oga::XML::Lexer do
|
|||
]
|
||||
end
|
||||
|
||||
example 'lex an empty inline doctype' do
|
||||
lex('<!DOCTYPE html []>').should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_END, nil, 1]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an inline doctype containing a newline' do
|
||||
lex("<!DOCTYPE html [foo\n]>").should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_INLINE, "foo\n", 1],
|
||||
[:T_DOCTYPE_END, nil, 2]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an inline doctype containing a trailing newline using an IO' do
|
||||
input = StringIO.new("<!DOCTYPE html [foo\n]>")
|
||||
|
||||
lex(input).should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_INLINE, "foo\n", 1],
|
||||
[:T_DOCTYPE_END, nil, 2]
|
||||
]
|
||||
end
|
||||
|
||||
example 'lex an inline doctype containing a leading newline using an IO' do
|
||||
input = StringIO.new("<!DOCTYPE html [\nfoo]>")
|
||||
|
||||
lex(input).should == [
|
||||
[:T_DOCTYPE_START, nil, 1],
|
||||
[:T_DOCTYPE_NAME, 'html', 1],
|
||||
[:T_DOCTYPE_INLINE, "\n", 1],
|
||||
[:T_DOCTYPE_INLINE, "foo", 2],
|
||||
[:T_DOCTYPE_END, nil, 2]
|
||||
]
|
||||
end
|
||||
|
||||
# Technically not valid, put in place to make sure that the Ragel rules are
|
||||
# not too greedy.
|
||||
example 'lex an inline doftype followed by a system ID' do
|
||||
|
|
|
@ -94,4 +94,14 @@ describe Oga::XML::Parser do
|
|||
@document.doctype.inline_rules.should == '<!ELEMENT foo>'
|
||||
end
|
||||
end
|
||||
|
||||
context 'doctypes with inline rules and newlines using a StringIO' do
|
||||
before :all do
|
||||
@document = parse(StringIO.new("<!DOCTYPE html [\nfoo]>"))
|
||||
end
|
||||
|
||||
example 'set the inline doctype rules' do
|
||||
@document.doctype.inline_rules.should == "\nfoo"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue