class CodeRay::Scanners::PHP

Scanner for PHP.

Original by Stefan Walk.

Constants

KINDS_NOT_LOC

Protected Instance Methods

reset_instance() click to toggle source
# File lib/coderay/scanners/php.rb, line 23
def reset_instance
  super
  @html_scanner.reset
end
scan_tokens(encoder, options) click to toggle source
# File lib/coderay/scanners/php.rb, line 234
def scan_tokens encoder, options
  
  if check(RE::PHP_START) ||  # starts with <?
   (match?(%r\s*<\S/) && check(%r.{1,1000}#{RE::PHP_START}/m)) || # starts with tag and contains <?
   check(%r.{0,1000}#{RE::HTML_INDICATOR}/m) ||
   check(%r.{1,100}#{RE::PHP_START}/m)  # PHP start after max 100 chars
    # is HTML with embedded PHP, so start with HTML
    states = [:initial]
  else
    # is just PHP, so start with PHP surrounded by HTML
    states = [:initial, :php]
  end
  
  label_expected = true
  case_expected = false
  
  heredoc_delimiter = nil
  delimiter = nil
  modifier = nil
  
  until eos?
    
    case states.last
    
    when :initial  # HTML
      if match = scan(RE::PHP_START)
        encoder.text_token match, :inline_delimiter
        label_expected = true
        states << :php
      else
        match = scan_until(%r(?=#{RE::PHP_START})/) || scan_rest
        @html_scanner.tokenize match unless match.empty?
      end
    
    when :php
      if match = scan(%r\s+/)
        encoder.text_token match, :space
      
      elsif match = scan(%r (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !o)
        encoder.text_token match, :comment
      
      elsif match = scan(RE::IDENTIFIER)
        kind = Words::IDENT_KIND[match]
        if kind == :ident && label_expected && check(%r:(?!:)/)
          kind = :label
          label_expected = true
        else
          label_expected = false
          if kind == :ident && match =~ %r^[A-Z]/
            kind = :constant
          elsif kind == :keyword
            case match
            when 'class'
              states << :class_expected
            when 'function'
              states << :function_expected
            when 'case', 'default'
              case_expected = true
            end
          elsif match == 'b' && check(%r['"]/)  # binary string literal
            modifier = match
            next
          end
        end
        encoder.text_token match, kind
      
      elsif match = scan(%r(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/)
        label_expected = false
        encoder.text_token match, :float
      
      elsif match = scan(%r0x[0-9a-fA-F]+/)
        label_expected = false
        encoder.text_token match, :hex
      
      elsif match = scan(%r\d+/)
        label_expected = false
        encoder.text_token match, :integer
      
      elsif match = scan(%r['"`]/)
        encoder.begin_group :string
        if modifier
          encoder.text_token modifier, :modifier
          modifier = nil
        end
        delimiter = match
        encoder.text_token match, :delimiter
        states.push match == "'" ? :sqstring : :dqstring
      
      elsif match = scan(RE::VARIABLE)
        label_expected = false
        encoder.text_token match, Words::VARIABLE_KIND[match]
      
      elsif match = scan(%r\{/)
        encoder.text_token match, :operator
        label_expected = true
        states.push :php
      
      elsif match = scan(%r\}/)
        if states.size == 1
          encoder.text_token match, :error
        else
          states.pop
          if states.last.is_a?(::Array)
            delimiter = states.last[1]
            states[-1] = states.last[0]
            encoder.text_token match, :delimiter
            encoder.end_group :inline
          else
            encoder.text_token match, :operator
            label_expected = true
          end
        end
      
      elsif match = scan(%r@/)
        label_expected = false
        encoder.text_token match, :exception
      
      elsif match = scan(RE::PHP_END)
        encoder.text_token match, :inline_delimiter
        states = [:initial]
      
      elsif match = scan(%r<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/)
        encoder.begin_group :string
        # warn 'heredoc in heredoc?' if heredoc_delimiter
        heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
        encoder.text_token match, :delimiter
        states.push self[3] ? :sqstring : :dqstring
        heredoc_delimiter = %r#{heredoc_delimiter}(?=;?$)/
      
      elsif match = scan(%r#{RE::OPERATOR}/)
        label_expected = match == ';'
        if case_expected
          label_expected = true if match == ':'
          case_expected = false
        end
        encoder.text_token match, :operator
      
      else
        encoder.text_token getch, :error
      
      end
    
    when :sqstring
      if match = scan(heredoc_delimiter ? %r[^\\\n]+/ : %r[^'\\]+/)
        encoder.text_token match, :content
      elsif !heredoc_delimiter && match = scan(%r'/)
        encoder.text_token match, :delimiter
        encoder.end_group :string
        delimiter = nil
        label_expected = false
        states.pop
      elsif heredoc_delimiter && match = scan(%r\n/)
        if scan heredoc_delimiter
          encoder.text_token "\n", :content
          encoder.text_token matched, :delimiter
          encoder.end_group :string
          heredoc_delimiter = nil
          label_expected = false
          states.pop
        else
          encoder.text_token match, :content
        end
      elsif match = scan(heredoc_delimiter ? %r\\\\/ : %r\\[\\'\n]/)
        encoder.text_token match, :char
      elsif match = scan(%r\\./)
        encoder.text_token match, :content
      elsif match = scan(%r\\/)
        encoder.text_token match, :error
      else
        states.pop
      end
    
    when :dqstring
      if match = scan(heredoc_delimiter ? %r[^${\\\n]+/ : (delimiter == '"' ? %r[^"${\\]+/ : %r[^`${\\]+/))
        encoder.text_token match, :content
      elsif !heredoc_delimiter && match = scan(delimiter == '"' ? %r"/ : %r`/)
        encoder.text_token match, :delimiter
        encoder.end_group :string
        delimiter = nil
        label_expected = false
        states.pop
      elsif heredoc_delimiter && match = scan(%r\n/)
        if scan heredoc_delimiter
          encoder.text_token "\n", :content
          encoder.text_token matched, :delimiter
          encoder.end_group :string
          heredoc_delimiter = nil
          label_expected = false
          states.pop
        else
          encoder.text_token match, :content
        end
      elsif match = scan(%r\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
        encoder.text_token match, :char
      elsif match = scan(heredoc_delimiter ? %r\\[nrtvf\\$]/ : (delimiter == '"' ? %r\\[nrtvf\\$"]/ : %r\\[nrtvf\\$`]/))
        encoder.text_token match, :char
      elsif match = scan(%r\\./)
        encoder.text_token match, :content
      elsif match = scan(%r\\/)
        encoder.text_token match, :error
      elsif match = scan(%r#{RE::VARIABLE}/)
        if check(%r\[#{RE::IDENTIFIER}\]/)
          encoder.begin_group :inline
          encoder.text_token match, :local_variable
          encoder.text_token scan(%r\[/), :operator
          encoder.text_token scan(%r#{RE::IDENTIFIER}/), :ident
          encoder.text_token scan(%r\]/), :operator
          encoder.end_group :inline
        elsif check(%r\[/)
          match << scan(%r\[['"]?#{RE::IDENTIFIER}?['"]?\]?/)
          encoder.text_token match, :error
        elsif check(%r->#{RE::IDENTIFIER}/)
          encoder.begin_group :inline
          encoder.text_token match, :local_variable
          encoder.text_token scan(%r->/), :operator
          encoder.text_token scan(%r#{RE::IDENTIFIER}/), :ident
          encoder.end_group :inline
        elsif check(%r->/)
          match << scan(%r->/)
          encoder.text_token match, :error
        else
          encoder.text_token match, :local_variable
        end
      elsif match = scan(%r\{/)
        if check(%r\$/)
          encoder.begin_group :inline
          states[-1] = [states.last, delimiter]
          delimiter = nil
          states.push :php
          encoder.text_token match, :delimiter
        else
          encoder.text_token match, :content
        end
      elsif match = scan(%r\$\{#{RE::IDENTIFIER}\}/)
        encoder.text_token match, :local_variable
      elsif match = scan(%r\$/)
        encoder.text_token match, :content
      else
        states.pop
      end
    
    when :class_expected
      if match = scan(%r\s+/)
        encoder.text_token match, :space
      elsif match = scan(%r#{RE::IDENTIFIER}/)
        encoder.text_token match, :class
        states.pop
      else
        states.pop
      end
    
    when :function_expected
      if match = scan(%r\s+/)
        encoder.text_token match, :space
      elsif match = scan(%r&/)
        encoder.text_token match, :operator
      elsif match = scan(%r#{RE::IDENTIFIER}/)
        encoder.text_token match, :function
        states.pop
      else
        states.pop
      end
    
    else
      raise_inspect 'Unknown state!', encoder, states
    end
    
  end
  
  encoder
end
setup() click to toggle source
# File lib/coderay/scanners/php.rb, line 19
def setup
  @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end