def scan_tokens encoder, options
state = options[:state] || @state
plain_string_content = @plain_string_content
in_tag = in_attribute = nil
encoder.begin_group :string if state == :attribute_value_string
until eos?
if state != :in_special_tag && match = scan(%r\s+/)
encoder.text_token match, :space
else
case state
when :initial
if match = scan(%r<!--(?:.*?-->|.*)/)
encoder.text_token match, :comment
elsif match = scan(%r<!DOCTYPE(?:.*?>|.*)/)
encoder.text_token match, :doctype
elsif match = scan(%r<\?xml(?:.*?\?>|.*)/)
encoder.text_token match, :preprocessor
elsif match = scan(%r<\?(?:.*?\?>|.*)/)
encoder.text_token match, :comment
elsif match = scan(%r<\/[-\w.:]*>?/)
in_tag = nil
encoder.text_token match, :tag
elsif match = scan(%r<(?:(script)|[-\w.:]+)(>)?/)
encoder.text_token match, :tag
in_tag = self[1]
if self[2]
state = :in_special_tag if in_tag
else
state = :attribute
end
elsif match = scan(%r[^<>&]+/)
encoder.text_token match, :plain
elsif match = scan(%r#{ENTITY}/x)
encoder.text_token match, :entity
elsif match = scan(%r[<>&]/)
in_tag = nil
encoder.text_token match, :error
else
raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
end
when :attribute
if match = scan(%r#{TAG_END}/)
encoder.text_token match, :tag
in_attribute = nil
if in_tag
state = :in_special_tag
else
state = :initial
end
elsif match = scan(%r#{ATTR_NAME}/)
in_attribute = IN_ATTRIBUTE[match]
encoder.text_token match, :attribute_name
state = :attribute_equal
else
in_tag = nil
encoder.text_token getch, :error
end
when :attribute_equal
if match = scan(%r=/)
encoder.text_token match, :operator
state = :attribute_value
else
state = :attribute
next
end
when :attribute_value
if match = scan(%r#{ATTR_NAME}/)
encoder.text_token match, :attribute_value
state = :attribute
elsif match = scan(%r["']/)
if in_attribute == :script
encoder.begin_group :inline
encoder.text_token match, :inline_delimiter
if scan(%rjavascript:[ \t]*/)
encoder.text_token matched, :comment
end
code = scan_until(match == '"' ? %r(?="|\z)/ : %r(?='|\z)/)
scan_java_script encoder, code
match = scan(%r["']/)
encoder.text_token match, :inline_delimiter if match
encoder.end_group :inline
state = :attribute
in_attribute = nil
else
encoder.begin_group :string
state = :attribute_value_string
plain_string_content = PLAIN_STRING_CONTENT[match]
encoder.text_token match, :delimiter
end
elsif match = scan(%r#{TAG_END}/)
encoder.text_token match, :tag
state = :initial
else
encoder.text_token getch, :error
end
when :attribute_value_string
if match = scan(plain_string_content)
encoder.text_token match, :content
elsif match = scan(%r['"]/)
encoder.text_token match, :delimiter
encoder.end_group :string
state = :attribute
elsif match = scan(%r#{ENTITY}/x)
encoder.text_token match, :entity
elsif match = scan(%r&/)
encoder.text_token match, :content
elsif match = scan(%r[\n>]/)
encoder.end_group :string
state = :initial
encoder.text_token match, :error
end
when :in_special_tag
case in_tag
when 'script'
encoder.text_token match, :space if match = scan(%r[ \t]*\n/)
if scan(%r(\s*<!--)(?:(.*?)(-->)|(.*))/)
code = self[2] || self[4]
closing = self[3]
encoder.text_token self[1], :comment
else
code = scan_until(%r(?=(?:\n\s*)?<\/script>)|\z/)
closing = false
end
unless code.empty?
encoder.begin_group :inline
scan_java_script encoder, code
encoder.end_group :inline
end
encoder.text_token closing, :comment if closing
state = :initial
else
raise 'unknown special tag: %p' % [in_tag]
end
else
raise_inspect 'Unknown state: %p' % [state], encoder
end
end
end
if options[:keep_state]
@state = state
@plain_string_content = plain_string_content
end
encoder.end_group :string if state == :attribute_value_string
encoder
end