Basic boilerplate for lexing CSS selectors.
This commit is contained in:
parent
1400a859ce
commit
aa60115c0a
|
@ -6,6 +6,7 @@ Gemfile.lock
|
|||
lib/oga/xml/parser.rb
|
||||
lib/oga/xpath/lexer.rb
|
||||
lib/oga/xpath/parser.rb
|
||||
lib/oga/css/lexer.rb
|
||||
|
||||
lib/liboga.*
|
||||
|
||||
|
|
|
@ -48,3 +48,5 @@ require_relative 'oga/xpath/node'
|
|||
require_relative 'oga/xpath/lexer'
|
||||
require_relative 'oga/xpath/parser'
|
||||
require_relative 'oga/xpath/evaluator'
|
||||
|
||||
require_relative 'oga/css/lexer'
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
%%machine css_lexer; # %
|
||||
|
||||
module Oga
|
||||
module CSS
|
||||
##
|
||||
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are
|
||||
# returned as arrays with every array having two values:
|
||||
#
|
||||
# 1. The token type as a Symbol
|
||||
# 2. The token value, or nil if there is no value.
|
||||
#
|
||||
# ## Thread Safety
|
||||
#
|
||||
# Similar to the XPath lexer this lexer keeps track of an internal state. As
|
||||
# a result it's not safe to share the same instance of this lexer between
|
||||
# multiple threads. However, no global state is used so you can use separate
|
||||
# instances in threads just fine.
|
||||
#
|
||||
class Lexer
|
||||
%% write data;
|
||||
|
||||
# % fix highlight
|
||||
|
||||
##
|
||||
# @param [String] data The data to lex.
|
||||
#
|
||||
def initialize(data)
|
||||
@data = data
|
||||
end
|
||||
|
||||
##
|
||||
# Gathers all the tokens for the input and returns them as an Array.
|
||||
#
|
||||
# @see [#advance]
|
||||
# @return [Array]
|
||||
#
|
||||
def lex
|
||||
tokens = []
|
||||
|
||||
advance do |type, value|
|
||||
tokens << [type, value]
|
||||
end
|
||||
|
||||
return tokens
|
||||
end
|
||||
|
||||
##
|
||||
# Advances through the input and generates the corresponding tokens. Each
|
||||
# token is yielded to the supplied block.
|
||||
#
|
||||
# This method stores the supplied block in `@block` and resets it after
|
||||
# the lexer loop has finished.
|
||||
#
|
||||
# @see [#add_token]
|
||||
#
|
||||
def advance(&block)
|
||||
@block = block
|
||||
|
||||
data = @data # saves ivar lookups while lexing.
|
||||
ts = nil
|
||||
te = nil
|
||||
stack = []
|
||||
top = 0
|
||||
cs = self.class.css_lexer_start
|
||||
act = 0
|
||||
eof = @data.bytesize
|
||||
p = 0
|
||||
pe = eof
|
||||
|
||||
_css_lexer_eof_trans = self.class.send(:_css_lexer_eof_trans)
|
||||
_css_lexer_from_state_actions = self.class.send(:_css_lexer_from_state_actions)
|
||||
_css_lexer_index_offsets = self.class.send(:_css_lexer_index_offsets)
|
||||
_css_lexer_indicies = self.class.send(:_css_lexer_indicies)
|
||||
_css_lexer_key_spans = self.class.send(:_css_lexer_key_spans)
|
||||
_css_lexer_to_state_actions = self.class.send(:_css_lexer_to_state_actions)
|
||||
_css_lexer_trans_actions = self.class.send(:_css_lexer_trans_actions)
|
||||
_css_lexer_trans_keys = self.class.send(:_css_lexer_trans_keys)
|
||||
_css_lexer_trans_targs = self.class.send(:_css_lexer_trans_targs)
|
||||
|
||||
%% write exec;
|
||||
|
||||
# % fix highlight
|
||||
ensure
|
||||
@block = nil
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
##
|
||||
# Emits a token of which the value is based on the supplied start/stop
|
||||
# position.
|
||||
#
|
||||
# @param [Symbol] type The token type.
|
||||
# @param [Fixnum] start
|
||||
# @param [Fixnum] stop
|
||||
#
|
||||
# @see [#text]
|
||||
# @see [#add_token]
|
||||
#
|
||||
def emit(type, start, stop)
|
||||
value = slice_input(start, stop)
|
||||
|
||||
add_token(type, value)
|
||||
end
|
||||
|
||||
##
|
||||
# Returns the text between the specified start and stop position.
|
||||
#
|
||||
# @param [Fixnum] start
|
||||
# @param [Fixnum] stop
|
||||
# @return [String]
|
||||
#
|
||||
def slice_input(start, stop)
|
||||
return @data.byteslice(start, stop - start)
|
||||
end
|
||||
|
||||
##
|
||||
# Yields a new token to the supplied block.
|
||||
#
|
||||
# @param [Symbol] type The token type.
|
||||
# @param [String] value The token value.
|
||||
#
|
||||
# @yieldparam [Symbol] type
|
||||
# @yieldparam [String|NilClass] value
|
||||
#
|
||||
def add_token(type, value = nil)
|
||||
@block.call(type, value)
|
||||
end
|
||||
|
||||
%%{
|
||||
getkey (data.getbyte(p) || 0);
|
||||
|
||||
whitespace = [\t ]+;
|
||||
|
||||
action emit_space {
|
||||
add_token(:T_SPACE)
|
||||
}
|
||||
|
||||
# Identifiers
|
||||
#
|
||||
# Identifiers are used for element and attribute names. Identifiers have
|
||||
# to start with a letter.
|
||||
|
||||
identifier = [a-zA-Z*]+ [a-zA-Z\-_0-9]*;
|
||||
|
||||
action emit_identifier {
|
||||
emit(:T_IDENT, ts, te)
|
||||
}
|
||||
|
||||
main := |*
|
||||
identifier => emit_identifier;
|
||||
whitespace => emit_space;
|
||||
|
||||
any;
|
||||
*|;
|
||||
}%%
|
||||
end # Lexer
|
||||
end # CSS
|
||||
end # Oga
|
|
@ -0,0 +1,17 @@
|
|||
require 'spec_helper'
|
||||
|
||||
describe Oga::CSS::Lexer do
|
||||
context 'paths' do
|
||||
example 'lex a simple path' do
|
||||
lex_css('h3').should == [[:T_IDENT, 'h3']]
|
||||
end
|
||||
|
||||
example 'lex a path with two members' do
|
||||
lex_css('div h3').should == [
|
||||
[:T_IDENT, 'div'],
|
||||
[:T_SPACE, nil],
|
||||
[:T_IDENT, 'h3']
|
||||
]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -33,6 +33,16 @@ module Oga
|
|||
return Oga::XPath::Lexer.new(input).lex
|
||||
end
|
||||
|
||||
##
|
||||
# Lexes a CSS expression.
|
||||
#
|
||||
# @param [String] input
|
||||
# @return [Array]
|
||||
#
|
||||
def lex_css(input)
|
||||
return Oga::CSS::Lexer.new(input).lex
|
||||
end
|
||||
|
||||
##
|
||||
# Parses an XPath expression.
|
||||
#
|
||||
|
|
|
@ -28,5 +28,6 @@ desc 'Generates the lexers'
|
|||
multitask :lexer => [
|
||||
'ext/c/lexer.c',
|
||||
'ext/java/org/liboga/xml/Lexer.java',
|
||||
'lib/oga/xpath/lexer.rb'
|
||||
'lib/oga/xpath/lexer.rb',
|
||||
'lib/oga/css/lexer.rb'
|
||||
]
|
||||
|
|
Loading…
Reference in New Issue