def scan_tokens encoder, options
state = :initial
string_delimiter = nil
package_name_expected = false
class_name_follows = false
last_token_dot = false
until eos?
case state
when :initial
if match = scan(%r \s+ | \\\n /)
encoder.text_token match, :space
next
elsif match = scan(%r // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !x)
encoder.text_token match, :comment
next
elsif package_name_expected && match = scan(%r #{IDENT} (?: \. #{IDENT} )* /x)
encoder.text_token match, package_name_expected
elsif match = scan(%r #{IDENT} | \[\] /x)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
case match
when 'import'
package_name_expected = :include
when 'package'
package_name_expected = :namespace
when 'class', 'interface'
class_name_follows = true
end
end
encoder.text_token match, kind
elsif match = scan(%r \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /)
encoder.text_token match, :operator
elsif match = scan(%r;/)
package_name_expected = false
encoder.text_token match, :operator
elsif match = scan(%r\{/)
class_name_follows = false
encoder.text_token match, :operator
elsif check(%r[\d.]/)
if match = scan(%r0[xX][0-9A-Fa-f]+/)
encoder.text_token match, :hex
elsif match = scan(%r(?>0[0-7]+)(?![89.eEfF])/)
encoder.text_token match, :octal
elsif match = scan(%r\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
encoder.text_token match, :float
elsif match = scan(%r\d+[lL]?/)
encoder.text_token match, :integer
end
elsif match = scan(%r["']/)
state = :string
encoder.begin_group state
string_delimiter = match
encoder.text_token match, :delimiter
elsif match = scan(%r @ #{IDENT} /x)
encoder.text_token match, :annotation
else
encoder.text_token getch, :error
end
when :string
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
encoder.text_token match, :content
elsif match = scan(%r["'\/]/)
encoder.text_token match, :delimiter
encoder.end_group state
state = :initial
string_delimiter = nil
elsif state == :string && (match = scan(%r \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /ox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
encoder.text_token match, :content
else
encoder.text_token match, :char
end
elsif match = scan(%r\\./)
encoder.text_token match, :content
elsif match = scan(%r \\ | $ /)
encoder.end_group state
state = :initial
encoder.text_token match, :error
else
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
end
else
raise_inspect 'Unknown state', encoder
end
last_token_dot = match == '.'
end
if state == :string
encoder.end_group state
end
encoder
end