From a76286b973ed6d6241a0280eb3d1d117428e9964 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Mon, 8 Jun 2015 06:34:49 +0200 Subject: [PATCH] Support for spaces around attribute equal signs This also takes care of making sure line numbers are incremented properly. Fixes #112 --- ext/ragel/base_lexer.rl | 35 ++++++++++++++++++++------ spec/oga/html/lexer/attributes_spec.rb | 33 ++++++++++++++++++++++++ spec/oga/xml/lexer/elements_spec.rb | 33 ++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 8 deletions(-) diff --git a/ext/ragel/base_lexer.rl b/ext/ragel/base_lexer.rl index 8c5eb6f..d1e3270 100644 --- a/ext/ragel/base_lexer.rl +++ b/ext/ragel/base_lexer.rl @@ -426,6 +426,32 @@ squote | dquote | '`' | '=' | '<' | '>' | whitespace_or_newline )+; + # Machine used after matching the "=" of an attribute and just before moving + # into the actual attribute value. + attribute_pre := |* + whitespace_or_newline $count_newlines; + + any => { + fhold; + + if ( lines > 0 ) + { + advance_line(lines); + + lines = 0; + } + + if ( html_p ) + { + fnext html_attribute_value; + } + else + { + fnext xml_attribute_value; + } + }; + *|; + # Machine used for processing HTML attribute values. html_attribute_value := |* squote | dquote => { @@ -482,14 +508,7 @@ # Attribute values. '=' => { - if ( html_p ) - { - fcall html_attribute_value; - } - else - { - fcall xml_attribute_value; - } + fcall attribute_pre; }; # We're done with the open tag of the element. diff --git a/spec/oga/html/lexer/attributes_spec.rb b/spec/oga/html/lexer/attributes_spec.rb index 00d9020..e5a5739 100644 --- a/spec/oga/html/lexer/attributes_spec.rb +++ b/spec/oga/html/lexer/attributes_spec.rb @@ -57,5 +57,38 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ] end + + it 'lexes an element with spaces around the attribute equal sign' do + lex_html('

').should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_STRING_BODY, 'bar', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes an element with a newline following the equals sign' do + lex_html(%Q{

}).should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 2], + [:T_STRING_BODY, 'bar', 2], + [:T_STRING_DQUOTE, nil, 2], + [:T_ELEM_END, nil, 2] + ] + end + + it 'lexes an element with a newline following the equals sign using an IO as input' do + lex_stringio(%Q{

}, :html => true).should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 2], + [:T_STRING_BODY, 'bar', 2], + [:T_STRING_DQUOTE, nil, 2], + [:T_ELEM_END, nil, 2] + ] + end end end diff --git a/spec/oga/xml/lexer/elements_spec.rb b/spec/oga/xml/lexer/elements_spec.rb index b4b808d..c0c512f 100644 --- a/spec/oga/xml/lexer/elements_spec.rb +++ b/spec/oga/xml/lexer/elements_spec.rb @@ -192,6 +192,39 @@ describe Oga::XML::Lexer do [:T_ELEM_END, nil, 1] ] end + + it 'lexes an element with spaces around the attribute equal sign' do + lex('

').should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_STRING_BODY, 'bar', 1], + [:T_STRING_DQUOTE, nil, 1], + [:T_ELEM_END, nil, 1] + ] + end + + it 'lexes an element with a newline following the equals sign' do + lex(%Q{

}).should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 2], + [:T_STRING_BODY, 'bar', 2], + [:T_STRING_DQUOTE, nil, 2], + [:T_ELEM_END, nil, 2] + ] + end + + it 'lexes an element with a newline following the equals sign using an IO as input' do + lex_stringio(%Q{

}).should == [ + [:T_ELEM_NAME, 'p', 1], + [:T_ATTR, 'foo', 1], + [:T_STRING_DQUOTE, nil, 2], + [:T_STRING_BODY, 'bar', 2], + [:T_STRING_DQUOTE, nil, 2], + [:T_ELEM_END, nil, 2] + ] + end end describe 'nested elements' do