Scanner for Python. Supports Python 3.
Based on pygments’ PythonLexer, see dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
# File lib/coderay/scanners/python.rb, line 103 def scan_tokens encoder, options state = :initial string_delimiter = nil string_raw = false string_type = nil docstring_coming = match?(%r#{DOCSTRING_COMING}/) last_token_dot = false unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' from_import_state = [] until eos? if state == :string if match = scan(STRING_DELIMITER_REGEXP[string_delimiter]) encoder.text_token match, :delimiter encoder.end_group string_type string_type = nil state = :initial next elsif string_delimiter.size == 3 && match = scan(%r\n/) encoder.text_token match, :content elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter]) encoder.text_token match, :content elsif !string_raw && match = scan(%r \\ #{ESCAPE} /x) encoder.text_token match, :char elsif match = scan(%r \\ #{UNICODE_ESCAPE} /x) encoder.text_token match, :char elsif match = scan(%r \\ . /) encoder.text_token match, :content elsif match = scan(%r \\ | $ /) encoder.end_group string_type string_type = nil encoder.text_token match, :error state = :initial else raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state end elsif match = scan(%r [ \t]+ | \\?\n /) encoder.text_token match, :space if match == "\n" state = :initial if state == :include_expected docstring_coming = true if match?(%r#{DOCSTRING_COMING}/) end next elsif match = scan(%r \# [^\n]* /x) encoder.text_token match, :comment next elsif state == :initial if match = scan(%r#{OPERATOR}/) encoder.text_token match, :operator elsif match = scan(%r(u?r?|b)?("""|"|'''|')/) string_delimiter = self[2] string_type = docstring_coming ? :docstring : :string docstring_coming = false if docstring_coming encoder.begin_group string_type string_raw = false modifiers = self[1] unless modifiers.empty? string_raw = !!modifiers.index(rr) encoder.text_token modifiers, :modifier match = string_delimiter end state = :string encoder.text_token match, :delimiter # TODO: backticks elsif match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/) kind = IDENT_KIND[match] # TODO: keyword arguments kind = :ident if last_token_dot if kind == :old_keyword kind = check(%r\(/) ? :ident : :keyword elsif kind == :predefined && check(%r *=/) kind = :ident elsif kind == :keyword state = DEF_NEW_STATE[match] from_import_state << match.to_sym if state == :include_expected end encoder.text_token match, kind elsif match = scan(%r@[a-zA-Z0-9_.]+[lL]?/) encoder.text_token match, :decorator elsif match = scan(%r0[xX][0-9A-Fa-f]+[lL]?/) encoder.text_token match, :hex elsif match = scan(%r0[bB][01]+[lL]?/) encoder.text_token match, :binary elsif match = scan(%r(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/) if scan(%r[jJ]/) match << matched encoder.text_token match, :imaginary else encoder.text_token match, :float end elsif match = scan(%r0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/) encoder.text_token match, :octal elsif match = scan(%r\d+([lL])?/) if self[1] == nil && scan(%r[jJ]/) match << matched encoder.text_token match, :imaginary else encoder.text_token match, :integer end else encoder.text_token getch, :error end elsif state == :def_expected state = :initial if match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/) encoder.text_token match, :method else next end elsif state == :class_expected state = :initial if match = scan(unicode ? %r#{NAME}/o : %r#{NAME}/) encoder.text_token match, :class else next end elsif state == :include_expected if match = scan(unicode ? %r#{DESCRIPTOR}/o : %r#{DESCRIPTOR}/) if match == 'as' encoder.text_token match, :keyword from_import_state << :as elsif from_import_state.first == :from && match == 'import' encoder.text_token match, :keyword from_import_state << :import elsif from_import_state.last == :as # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method encoder.text_token match, :ident from_import_state.pop elsif IDENT_KIND[match] == :keyword unscan match = nil state = :initial next else encoder.text_token match, :include end elsif match = scan(%r,/) from_import_state.pop if from_import_state.last == :as encoder.text_token match, :operator else from_import_state = [] state = :initial next end else raise_inspect 'Unknown state', encoder, state end last_token_dot = match == '.' end if state == :string encoder.end_group string_type end encoder end