Basic boilerplate for lexing CSS selectors.

This commit is contained in:
Yorick Peterse 2014-09-16 16:32:57 +02:00
parent 1400a859ce
commit aa60115c0a
6 changed files with 191 additions and 1 deletions

1
.gitignore vendored
View File

@ -6,6 +6,7 @@ Gemfile.lock
lib/oga/xml/parser.rb lib/oga/xml/parser.rb
lib/oga/xpath/lexer.rb lib/oga/xpath/lexer.rb
lib/oga/xpath/parser.rb lib/oga/xpath/parser.rb
lib/oga/css/lexer.rb
lib/liboga.* lib/liboga.*

View File

@ -48,3 +48,5 @@ require_relative 'oga/xpath/node'
require_relative 'oga/xpath/lexer' require_relative 'oga/xpath/lexer'
require_relative 'oga/xpath/parser' require_relative 'oga/xpath/parser'
require_relative 'oga/xpath/evaluator' require_relative 'oga/xpath/evaluator'
require_relative 'oga/css/lexer'

159
lib/oga/css/lexer.rl Normal file
View File

@ -0,0 +1,159 @@
%%machine css_lexer; # %
module Oga
module CSS
##
# Lexer for turning CSS expressions into a sequence of tokens. Tokens are
# returned as arrays with every array having two values:
#
# 1. The token type as a Symbol
# 2. The token value, or nil if there is no value.
#
# ## Thread Safety
#
# Similar to the XPath lexer this lexer keeps track of an internal state. As
# a result it's not safe to share the same instance of this lexer between
# multiple threads. However, no global state is used so you can use separate
# instances in threads just fine.
#
class Lexer
%% write data;
# % fix highlight
##
# @param [String] data The data to lex.
#
def initialize(data)
@data = data
end
##
# Gathers all the tokens for the input and returns them as an Array.
#
# @see [#advance]
# @return [Array]
#
def lex
tokens = []
advance do |type, value|
tokens << [type, value]
end
return tokens
end
##
# Advances through the input and generates the corresponding tokens. Each
# token is yielded to the supplied block.
#
# This method stores the supplied block in `@block` and resets it after
# the lexer loop has finished.
#
# @see [#add_token]
#
def advance(&block)
@block = block
data = @data # saves ivar lookups while lexing.
ts = nil
te = nil
stack = []
top = 0
cs = self.class.css_lexer_start
act = 0
eof = @data.bytesize
p = 0
pe = eof
_css_lexer_eof_trans = self.class.send(:_css_lexer_eof_trans)
_css_lexer_from_state_actions = self.class.send(:_css_lexer_from_state_actions)
_css_lexer_index_offsets = self.class.send(:_css_lexer_index_offsets)
_css_lexer_indicies = self.class.send(:_css_lexer_indicies)
_css_lexer_key_spans = self.class.send(:_css_lexer_key_spans)
_css_lexer_to_state_actions = self.class.send(:_css_lexer_to_state_actions)
_css_lexer_trans_actions = self.class.send(:_css_lexer_trans_actions)
_css_lexer_trans_keys = self.class.send(:_css_lexer_trans_keys)
_css_lexer_trans_targs = self.class.send(:_css_lexer_trans_targs)
%% write exec;
# % fix highlight
ensure
@block = nil
end
private
##
# Emits a token of which the value is based on the supplied start/stop
# position.
#
# @param [Symbol] type The token type.
# @param [Fixnum] start
# @param [Fixnum] stop
#
# @see [#text]
# @see [#add_token]
#
def emit(type, start, stop)
value = slice_input(start, stop)
add_token(type, value)
end
##
# Returns the text between the specified start and stop position.
#
# @param [Fixnum] start
# @param [Fixnum] stop
# @return [String]
#
def slice_input(start, stop)
return @data.byteslice(start, stop - start)
end
##
# Yields a new token to the supplied block.
#
# @param [Symbol] type The token type.
# @param [String] value The token value.
#
# @yieldparam [Symbol] type
# @yieldparam [String|NilClass] value
#
def add_token(type, value = nil)
@block.call(type, value)
end
%%{
getkey (data.getbyte(p) || 0);
whitespace = [\t ]+;
action emit_space {
add_token(:T_SPACE)
}
# Identifiers
#
# Identifiers are used for element and attribute names. Identifiers have
# to start with a letter.
identifier = [a-zA-Z*]+ [a-zA-Z\-_0-9]*;
action emit_identifier {
emit(:T_IDENT, ts, te)
}
main := |*
identifier => emit_identifier;
whitespace => emit_space;
any;
*|;
}%%
end # Lexer
end # CSS
end # Oga

View File

@ -0,0 +1,17 @@
require 'spec_helper'
describe Oga::CSS::Lexer do
context 'paths' do
example 'lex a simple path' do
lex_css('h3').should == [[:T_IDENT, 'h3']]
end
example 'lex a path with two members' do
lex_css('div h3').should == [
[:T_IDENT, 'div'],
[:T_SPACE, nil],
[:T_IDENT, 'h3']
]
end
end
end

View File

@ -33,6 +33,16 @@ module Oga
return Oga::XPath::Lexer.new(input).lex return Oga::XPath::Lexer.new(input).lex
end end
##
# Lexes a CSS expression.
#
# @param [String] input
# @return [Array]
#
def lex_css(input)
return Oga::CSS::Lexer.new(input).lex
end
## ##
# Parses an XPath expression. # Parses an XPath expression.
# #

View File

@ -28,5 +28,6 @@ desc 'Generates the lexers'
multitask :lexer => [ multitask :lexer => [
'ext/c/lexer.c', 'ext/c/lexer.c',
'ext/java/org/liboga/xml/Lexer.java', 'ext/java/org/liboga/xml/Lexer.java',
'lib/oga/xpath/lexer.rb' 'lib/oga/xpath/lexer.rb',
'lib/oga/css/lexer.rb'
] ]