def scan_tokens encoder, options
state = :initial
inline_block_stack = []
inline_block_paren_depth = nil
string_delimiter = nil
import_clause = class_name_follows = last_token = after_def = false
value_expected = true
until eos?
case state
when :initial
if match = scan(%r \s+ | \\\n /)
encoder.text_token match, :space
if match.index \n\
import_clause = after_def = false
value_expected = true unless value_expected
end
next
elsif match = scan(%r // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !x)
value_expected = true
after_def = false
encoder.text_token match, :comment
elsif bol? && match = scan(%r \#!.* /)
encoder.text_token match, :doctype
elsif import_clause && match = scan(%r (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /x)
after_def = value_expected = false
encoder.text_token match, :include
elsif match = scan(%r #{IDENT} | \[\] /x)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
if last_token == '.'
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
elsif after_def && check(%r\s*[({]/)
kind = :method
after_def = false
elsif kind == :ident && last_token != '?' && check(%r:/)
kind = :key
else
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
import_clause = match == 'import'
after_def = true if match == 'def'
end
encoder.text_token match, kind
elsif match = scan(%r;/)
import_clause = after_def = false
value_expected = true
encoder.text_token match, :operator
elsif match = scan(%r\{/)
class_name_follows = after_def = false
value_expected = true
encoder.text_token match, :operator
if !inline_block_stack.empty?
inline_block_paren_depth += 1
end
elsif match = scan(%r \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /)
value_expected = true
value_expected = :regexp if match == '~'
after_def = false
encoder.text_token match, :operator
elsif match = scan(%r [)\]}] /)
value_expected = after_def = false
if !inline_block_stack.empty? && match == '}'
inline_block_paren_depth -= 1
if inline_block_paren_depth == 0
encoder.text_token match, :inline_delimiter
encoder.end_group :inline
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
next
end
end
encoder.text_token match, :operator
elsif check(%r[\d.]/)
after_def = value_expected = false
if match = scan(%r0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(%r(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(%r\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(%r\d+[lLgG]?/)
encoder.text_token match, :integer
end
elsif match = scan(%r'''|"""/)
after_def = value_expected = false
state = :multiline_string
encoder.begin_group :string
string_delimiter = match
encoder.text_token match, :delimiter
elsif match = scan(%r["']/)
after_def = value_expected = false
state = match == '/' ? :regexp : :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif value_expected && match = scan(%r\//)
after_def = value_expected = false
encoder.begin_group :regexp
state = :regexp
string_delimiter = '/'
encoder.text_token match, :delimiter
elsif match = scan(%r @ #{IDENT} /x)
after_def = value_expected = false
encoder.text_token match, :annotation
elsif match = scan(%r\//)
after_def = false
value_expected = true
encoder.text_token match, :operator
else
encoder.text_token getch, :error
end
when :string, :regexp, :multiline_string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(state == :multiline_string ? %r'''|"""/ : %r["'\/]/)
encoder.text_token match, :delimiter
if state == :regexp
modifiers = scan(%r[ix]+/)
encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
end
state = :string if state == :multiline_string
encoder.end_group state
string_delimiter = nil
after_def = value_expected = false
state = :initial
next
elsif (state == :string || state == :multiline_string) &&
(match = scan(%r \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox))
if string_delimiter[0] == '' && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif state == :regexp && match = scan(%r \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /ox)
encoder.text_token match, :char
elsif match = scan(%r \$ #{IDENT} /ox)
encoder.begin_group :inline
encoder.text_token '$', :inline_delimiter
match = match[1..-1]
encoder.text_token match, IDENT_KIND[match]
encoder.end_group :inline
next
elsif match = scan(%r \$ \{ /)
encoder.begin_group :inline
encoder.text_token match, :inline_delimiter
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
inline_block_paren_depth = 1
state = :initial
next
elsif match = scan(%r \$ /x)
encoder.text_token match, :content
elsif match = scan(%r \\. /x)
encoder.text_token match, :content
elsif match = scan(%r \\ | \n /)
encoder.end_group state
encoder.text_token match, :error
after_def = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
last_token = match unless [:space, :comment, :doctype].include? kind
end
if [:multiline_string, :string, :regexp].include? state
encoder.end_group state
end
encoder
end