diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index f6a80a2..1aa09f5 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -289,7 +289,7 @@ # Machine for processing doctypes. Doctype values such as the public # and system IDs are treated as T_STRING tokens. doctype := |* - 'PUBLIC' | 'SYSTEM' => { + 'PUBLIC'i | 'SYSTEM'i => { callback(id_on_doctype_type, data, encoding, ts, te); }; diff --git a/spec/oga/xml/lexer/doctype_spec.rb b/spec/oga/xml/lexer/doctype_spec.rb index 7fbe8d1..14735ea 100644 --- a/spec/oga/xml/lexer/doctype_spec.rb +++ b/spec/oga/xml/lexer/doctype_spec.rb @@ -108,7 +108,7 @@ describe Oga::XML::Lexer do # Technically not valid, put in place to make sure that the Ragel rules are # not too greedy. - it 'lexes an inline doftype followed by a system ID' do + it 'lexes an inline doctype followed by a system ID' do expect(lex('] "foo">')).to eq([ [:T_DOCTYPE_START, nil, 1], [:T_DOCTYPE_NAME, 'html', 1], @@ -119,5 +119,31 @@ describe Oga::XML::Lexer do [:T_DOCTYPE_END, nil, 1] ]) end + + it 'does not care about the casing when using a public doctype' do + expect(lex('] "foo">')).to eq([ + [:T_DOCTYPE_START, nil, 1], + [:T_DOCTYPE_NAME, 'HtMl', 1], + [:T_DOCTYPE_TYPE, 'PuBlIc', 1], + [:T_DOCTYPE_INLINE, '', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_STRING_BODY, 'foo', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_DOCTYPE_END, nil, 1] + ]) + end + + it 'does not care about the casing when using a system doctype' do + expect(lex('] "foo">')).to eq([ + [:T_DOCTYPE_START, nil, 1], + [:T_DOCTYPE_NAME, 'HtMl', 1], + [:T_DOCTYPE_TYPE, 'SyStEm', 1], + [:T_DOCTYPE_INLINE, '', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_STRING_BODY, 'foo', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_DOCTYPE_END, nil, 1] + ]) + end end end