From 8cfa81aed9425800638c7c552f239c24dbab1990 Mon Sep 17 00:00:00 2001 From: Yorick Peterse Date: Wed, 12 Mar 2014 23:02:54 +0100 Subject: [PATCH] Basic support for parsing elements. This includes support for elements with namespaces and attributes. Nested elements are not yet supported. --- lib/oga/parser.y | 63 +++++++++++++++++++++++++++ spec/oga/parser/elements_spec.rb | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 spec/oga/parser/elements_spec.rb diff --git a/lib/oga/parser.y b/lib/oga/parser.y index e0ccd0e..8d77ca1 100644 --- a/lib/oga/parser.y +++ b/lib/oga/parser.y @@ -5,6 +5,7 @@ token T_STRING T_TEXT token T_DOCTYPE_START T_DOCTYPE_END T_DOCTYPE_TYPE token T_CDATA_START T_CDATA_END token T_COMMENT_START T_COMMENT_END +token T_ELEM_OPEN T_ELEM_NAME T_ELEM_NS T_ELEM_CLOSE T_ATTR options no_result_var @@ -23,6 +24,8 @@ rule : doctype | cdata | comment + | element + | text ; # Doctypes @@ -70,6 +73,66 @@ rule | T_COMMENT_START T_TEXT T_COMMENT_END { s(:comment, val[1]) } ; + # Elements + + element + #

+ : element_open T_ELEM_CLOSE { s(:element, val[0]) } + + #

+ | element_open attributes T_ELEM_CLOSE + { + s(:element, val[0], val[1]) + } + + #

foo

+ | element_open text T_ELEM_CLOSE + { + s(:element, val[0], nil, val[1]) + } + + #

Bar

+ | element_open attributes text T_ELEM_CLOSE + { + s(:element, val[0], val[1], val[2]) + } + ; + + element_open + #

+ : T_ELEM_OPEN T_ELEM_NAME { [nil, val[1]] } + + # + | T_ELEM_OPEN T_ELEM_NS T_ELEM_NAME { [val[1], val[2]] } + ; + + # Attributes + + attributes + : attributes_ { s(:attributes, val[0]) } + ; + + attributes_ + : attributes_ attribute { val } + | attribute { val } + ; + + attribute + # foo + : T_ATTR { s(:attribute, val[0]) } + + # foo="bar" + | T_ATTR T_STRING { s(:attribute, val[0], val[1]) } + ; + + # Plain text + + text + : T_TEXT { s(:text, val[0]) } + ; + + # Whitespace + whitespaces : whitespaces whitespace | whitespace diff --git a/spec/oga/parser/elements_spec.rb b/spec/oga/parser/elements_spec.rb new file mode 100644 index 0000000..fb5b25f --- /dev/null +++ b/spec/oga/parser/elements_spec.rb @@ -0,0 +1,73 @@ +require 'spec_helper' + +describe Oga::Parser do + context 'elements' do + example 'parse an empty element' do + parse_html('

').should == s(:document, s(:element, nil, 'p')) + end + + example 'parse an element with text' do + parse_html('

foo

').should == s( + :document, + s(:element, nil, 'p', nil, s(:text, 'foo')) + ) + end + + example 'parse an element with a single attribute' do + parse_html('

').should == s( + :document, + s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo'))) + ) + end + + example 'parse an element with a single attribute with a value' do + parse_html('

').should == s( + :document, + s(:element, nil, 'p', s(:attributes, s(:attribute, 'foo', 'bar'))) + ) + end + + example 'parse an element with multiple attributes' do + parse_html('

').should == s( + :document, + s( + :element, + nil, + 'p', + s( + :attributes, + s(:attribute, 'foo', 'bar'), + s(:attribute, 'baz', 'bad') + ) + ) + ) + end + + example 'parse an element with text and attributes' do + parse_html('

Bar

').should == s( + :document, + s( + :element, + nil, + 'p', + s(:attributes, s(:attribute, 'class', 'foo')), + s(:text, 'Bar') + ) + ) + end + + example 'parse an element with a namespace' do + parse_html('

').should == s( + :document, + s(:element, 'foo', 'p') + ) + end + + example 'parse an element with a namespace and an attribute' do + parse_html('

').should == s( + :document, + s(:element, 'foo', 'p', s(:attributes, s(:attribute, 'class', 'bar'))) + ) + end + end +end