class CodeRay::Scanners::HTML

HTML Scanner

Alias: xhtml

See also: Scanners::XML

Constants

EVENT_ATTRIBUTES
IN_ATTRIBUTE

Public Instance Methods

reset() click to toggle source
# File lib/coderay/scanners/html.rb, line 61
def reset
  super
  @state = :initial
  @plain_string_content = nil
end

Protected Instance Methods

scan_java_script(encoder, code) click to toggle source
# File lib/coderay/scanners/html.rb, line 74
def scan_java_script encoder, code
  if code && !code.empty?
    @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
    # encoder.begin_group :inline
    @java_script_scanner.tokenize code, :tokens => encoder
    # encoder.end_group :inline
  end
end
scan_tokens(encoder, options) click to toggle source
# File lib/coderay/scanners/html.rb, line 83
def scan_tokens encoder, options
  state = options[:state] || @state
  plain_string_content = @plain_string_content
  in_tag = in_attribute = nil
  
  encoder.begin_group :string if state == :attribute_value_string
  
  until eos?
    
    if state != :in_special_tag && match = scan(%r\s+/)
      encoder.text_token match, :space
      
    else
      
      case state
      
      when :initial
        if match = scan(%r<!--(?:.*?-->|.*)/)
          encoder.text_token match, :comment
        elsif match = scan(%r<!DOCTYPE(?:.*?>|.*)/)
          encoder.text_token match, :doctype
        elsif match = scan(%r<\?xml(?:.*?\?>|.*)/)
          encoder.text_token match, :preprocessor
        elsif match = scan(%r<\?(?:.*?\?>|.*)/)
          encoder.text_token match, :comment
        elsif match = scan(%r<\/[-\w.:]*>?/)
          in_tag = nil
          encoder.text_token match, :tag
        elsif match = scan(%r<(?:(script)|[-\w.:]+)(>)?/)
          encoder.text_token match, :tag
          in_tag = self[1]
          if self[2]
            state = :in_special_tag if in_tag
          else
            state = :attribute
          end
        elsif match = scan(%r[^<>&]+/)
          encoder.text_token match, :plain
        elsif match = scan(%r#{ENTITY}/x)
          encoder.text_token match, :entity
        elsif match = scan(%r[<>&]/)
          in_tag = nil
          encoder.text_token match, :error
        else
          raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
        end
        
      when :attribute
        if match = scan(%r#{TAG_END}/)
          encoder.text_token match, :tag
          in_attribute = nil
          if in_tag
            state = :in_special_tag
          else
            state = :initial
          end
        elsif match = scan(%r#{ATTR_NAME}/)
          in_attribute = IN_ATTRIBUTE[match]
          encoder.text_token match, :attribute_name
          state = :attribute_equal
        else
          in_tag = nil
          encoder.text_token getch, :error
        end
        
      when :attribute_equal
        if match = scan(%r=/)  #/
          encoder.text_token match, :operator
          state = :attribute_value
        else
          state = :attribute
          next
        end
        
      when :attribute_value
        if match = scan(%r#{ATTR_NAME}/)
          encoder.text_token match, :attribute_value
          state = :attribute
        elsif match = scan(%r["']/)
          if in_attribute == :script
            encoder.begin_group :inline
            encoder.text_token match, :inline_delimiter
            if scan(%rjavascript:[ \t]*/)
              encoder.text_token matched, :comment
            end
            code = scan_until(match == '"' ? %r(?="|\z)/ : %r(?='|\z)/)
            scan_java_script encoder, code
            match = scan(%r["']/)
            encoder.text_token match, :inline_delimiter if match
            encoder.end_group :inline
            state = :attribute
            in_attribute = nil
          else
            encoder.begin_group :string
            state = :attribute_value_string
            plain_string_content = PLAIN_STRING_CONTENT[match]
            encoder.text_token match, :delimiter
          end
        elsif match = scan(%r#{TAG_END}/)
          encoder.text_token match, :tag
          state = :initial
        else
          encoder.text_token getch, :error
        end
        
      when :attribute_value_string
        if match = scan(plain_string_content)
          encoder.text_token match, :content
        elsif match = scan(%r['"]/)
          encoder.text_token match, :delimiter
          encoder.end_group :string
          state = :attribute
        elsif match = scan(%r#{ENTITY}/x)
          encoder.text_token match, :entity
        elsif match = scan(%r&/)
          encoder.text_token match, :content
        elsif match = scan(%r[\n>]/)
          encoder.end_group :string
          state = :initial
          encoder.text_token match, :error
        end
        
      when :in_special_tag
        case in_tag
        when 'script'
          encoder.text_token match, :space if match = scan(%r[ \t]*\n/)
          if scan(%r(\s*<!--)(?:(.*?)(-->)|(.*))/)
            code = self[2] || self[4]
            closing = self[3]
            encoder.text_token self[1], :comment
          else
            code = scan_until(%r(?=(?:\n\s*)?<\/script>)|\z/)
            closing = false
          end
          unless code.empty?
            encoder.begin_group :inline
            scan_java_script encoder, code
            encoder.end_group :inline
          end
          encoder.text_token closing, :comment if closing
          state = :initial
        else
          raise 'unknown special tag: %p' % [in_tag]
        end
        
      else
        raise_inspect 'Unknown state: %p' % [state], encoder
        
      end
      
    end
    
  end
  
  if options[:keep_state]
    @state = state
    @plain_string_content = plain_string_content
  end
  
  encoder.end_group :string if state == :attribute_value_string
  
  encoder
end
setup() click to toggle source
# File lib/coderay/scanners/html.rb, line 69
def setup
  @state = :initial
  @plain_string_content = nil
end