class CodeRay::Scanners::Python

Scanner for Python. Supports Python 3.

Based on pygments’ PythonLexer, see dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.

Protected Instance Methods

scan_tokens(encoder, options) click to toggle source
# File lib/coderay/scanners/python.rb, line 103
def scan_tokens encoder, options
  
  state = :initial
  string_delimiter = nil
  string_raw = false
  string_type = nil
  docstring_coming = match?(%r#{DOCSTRING_COMING}/)
  last_token_dot = false
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
  from_import_state = []
  
  until eos?
    
    if state == :string
      if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
        encoder.text_token match, :delimiter
        encoder.end_group string_type
        string_type = nil
        state = :initial
        next
      elsif string_delimiter.size == 3 && match = scan(%r\n/)
        encoder.text_token match, :content
      elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
        encoder.text_token match, :content
      elsif !string_raw && match = scan(%r \\ #{ESCAPE} /x)
        encoder.text_token match, :char
      elsif match = scan(%r \\ #{UNICODE_ESCAPE} /x)
        encoder.text_token match, :char
      elsif match = scan(%r \\ . /)
        encoder.text_token match, :content
      elsif match = scan(%r \\ | $ /)
        encoder.end_group string_type
        string_type = nil
        encoder.text_token match, :error
        state = :initial
      else
        raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
      end
    
    elsif match = scan(%r [ \t]+ | \\?\n /)
      encoder.text_token match, :space
      if match == "\n"
        state = :initial if state == :include_expected
        docstring_coming = true if match?(%r#{DOCSTRING_COMING}/)
      end
      next
    
    elsif match = scan(%r \# [^\n]* /x)
      encoder.text_token match, :comment
      next
    
    elsif state == :initial
      
      if match = scan(%r#{OPERATOR}/)
        encoder.text_token match, :operator
      
      elsif match = scan(%r(u?r?|b)?("""|"|'''|')/)
        string_delimiter = self[2]
        string_type = docstring_coming ? :docstring : :string
        docstring_coming = false if docstring_coming
        encoder.begin_group string_type
        string_raw = false
        modifiers = self[1]
        unless modifiers.empty?
          string_raw = !!modifiers.index(rr)
          encoder.text_token modifiers, :modifier
          match = string_delimiter
        end
        state = :string
        encoder.text_token match, :delimiter
      
      # TODO: backticks
      
      elsif match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/)
        kind = IDENT_KIND[match]
        # TODO: keyword arguments
        kind = :ident if last_token_dot
        if kind == :old_keyword
          kind = check(%r\(/) ? :ident : :keyword
        elsif kind == :predefined && check(%r *=/)
          kind = :ident
        elsif kind == :keyword
          state = DEF_NEW_STATE[match]
          from_import_state << match.to_sym if state == :include_expected
        end
        encoder.text_token match, kind
      
      elsif match = scan(%r@[a-zA-Z0-9_.]+[lL]?/)
        encoder.text_token match, :decorator
      
      elsif match = scan(%r0[xX][0-9A-Fa-f]+[lL]?/)
        encoder.text_token match, :hex
      
      elsif match = scan(%r0[bB][01]+[lL]?/)
        encoder.text_token match, :binary
      
      elsif match = scan(%r(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
        if scan(%r[jJ]/)
          match << matched
          encoder.text_token match, :imaginary
        else
          encoder.text_token match, :float
        end
      
      elsif match = scan(%r0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
        encoder.text_token match, :octal
      
      elsif match = scan(%r\d+([lL])?/)
        if self[1] == nil && scan(%r[jJ]/)
          match << matched
          encoder.text_token match, :imaginary
        else
          encoder.text_token match, :integer
        end
      
      else
        encoder.text_token getch, :error
      
      end
        
    elsif state == :def_expected
      state = :initial
      if match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/)
        encoder.text_token match, :method
      else
        next
      end
    
    elsif state == :class_expected
      state = :initial
      if match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/)
        encoder.text_token match, :class
      else
        next
      end
      
    elsif state == :include_expected
      if match = scan(unicode ? %r#{DESCRIPTOR}/o : %r#{DESCRIPTOR}/)
        if match == 'as'
          encoder.text_token match, :keyword
          from_import_state << :as
        elsif from_import_state.first == :from && match == 'import'
          encoder.text_token match, :keyword
          from_import_state << :import
        elsif from_import_state.last == :as
          # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
          encoder.text_token match, :ident
          from_import_state.pop
        elsif IDENT_KIND[match] == :keyword
          unscan
          match = nil
          state = :initial
          next
        else
          encoder.text_token match, :include
        end
      elsif match = scan(%r,/)
        from_import_state.pop if from_import_state.last == :as
        encoder.text_token match, :operator
      else
        from_import_state = []
        state = :initial
        next
      end
      
    else
      raise_inspect 'Unknown state', encoder, state
      
    end
    
    last_token_dot = match == '.'
    
  end
  
  if state == :string
    encoder.end_group string_type
  end
  
  encoder
end