Lexing of CDATA tags.
This commit is contained in:
		
							parent
							
								
									0a336e76d3
								
							
						
					
					
						commit
						c4e0406ed9
					
				|  | @ -73,20 +73,23 @@ module Oga | |||
| 
 | ||||
|       any_escaped = /\\./; | ||||
| 
 | ||||
|       smaller = '<'; | ||||
|       greater = '>'; | ||||
|       slash   = '/'; | ||||
|       bang    = '!'; | ||||
|       equals  = '='; | ||||
|       colon   = ':'; | ||||
|       dash    = '-'; | ||||
|       smaller  = '<'; | ||||
|       greater  = '>'; | ||||
|       slash    = '/'; | ||||
|       bang     = '!'; | ||||
|       equals   = '='; | ||||
|       colon    = ':'; | ||||
|       dash     = '-'; | ||||
|       lbracket = '['; | ||||
|       rbracket = ']'; | ||||
| 
 | ||||
|       s_quote  = "'"; | ||||
|       d_quote  = '"'; | ||||
| 
 | ||||
|       # FIXME: there really should be a better way of doing this. | ||||
|       text = (any - s_quote - d_quote - equals - bang - slash - | ||||
|         greater - smaller - whitespace - newline - colon - dash)+; | ||||
|         greater - smaller - whitespace - newline - colon - dash - | ||||
|         lbracket - rbracket)+; | ||||
| 
 | ||||
|       # Unicode characters, taken from whitequark's wonderful parser library. | ||||
|       # (I honestly need to buy that dude a beer or 100). Basically this | ||||
|  | @ -103,6 +106,8 @@ module Oga | |||
|         d_quote    => { t(:T_DQUOTE) }; | ||||
|         s_quote    => { t(:T_SQUOTE) }; | ||||
|         dash       => { t(:T_DASH) }; | ||||
|         rbracket   => { t(:T_RBRACKET) }; | ||||
|         lbracket   => { t(:T_LBRACKET) }; | ||||
|         colon      => { t(:T_COLON) }; | ||||
|         bang       => { t(:T_BANG) }; | ||||
|         equals     => { t(:T_EQUALS) }; | ||||
|  |  | |||
|  | @ -116,4 +116,20 @@ describe Oga::Lexer do | |||
|       ] | ||||
|     end | ||||
|   end | ||||
| 
 | ||||
|   context 'cdata tags' do | ||||
|     example 'lex a cdata tag' do | ||||
|       lex('<![CDATA[foo]]>').should == [ | ||||
|         [:T_SMALLER, '<', 1, 1], | ||||
|         [:T_BANG, '!', 1, 2], | ||||
|         [:T_LBRACKET, '[', 1, 3], | ||||
|         [:T_TEXT, 'CDATA', 1, 4], | ||||
|         [:T_LBRACKET, '[', 1, 9], | ||||
|         [:T_TEXT, 'foo', 1, 10], | ||||
|         [:T_RBRACKET, ']', 1, 13], | ||||
|         [:T_RBRACKET, ']', 1, 14], | ||||
|         [:T_GREATER, '>', 1, 15], | ||||
|       ] | ||||
|     end | ||||
|   end | ||||
| end | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue