class CodeRay::Scanners::Ruby

This scanner is really complex, since Ruby is a complex language!

It tries to highlight 100% of all common code, and 90% of strange codes.

It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.

Public Instance Methods

interpreted_string_state() click to toggle source
# File lib/coderay/scanners/ruby.rb, line 19
def interpreted_string_state
  StringState.new :string, true, '"'
end

Protected Instance Methods

scan_tokens(encoder, options) click to toggle source
# File lib/coderay/scanners/ruby.rb, line 29
def scan_tokens encoder, options
  state, heredocs = options[:state] || @state
  heredocs = heredocs.dup if heredocs.is_a?(Array)
  
  if state && state.instance_of?(StringState)
    encoder.begin_group state.type
  end
  
  last_state = nil
  
  method_call_expected = false
  value_expected = true
  
  inline_block_stack = nil
  inline_block_curly_depth = 0
  
  if heredocs
    state = heredocs.shift
    encoder.begin_group state.type
    heredocs = nil if heredocs.empty?
  end
  
  # def_object_stack = nil
  # def_object_paren_depth = 0
  
  patterns = Patterns  # avoid constant lookup
  
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
  
  until eos?
    
    if state.instance_of? ::Symbol
      
      if match = scan(%r[ \t\f\v]+/)
        encoder.text_token match, :space
        
      elsif match = scan(%r\n/)
        if heredocs
          unscan  # heredoc scanning needs \n at start
          state = heredocs.shift
          encoder.begin_group state.type
          heredocs = nil if heredocs.empty?
        else
          state = :initial if state == :undef_comma_expected
          encoder.text_token match, :space
          value_expected = true
        end
        
      elsif match = scan(bol? ? %r \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /x : %r\#.*/)
        encoder.text_token match, self[1] ? :doctype : :comment
        
      elsif match = scan(%r\\\n/)
        if heredocs
          unscan  # heredoc scanning needs \n at start
          encoder.text_token scan(%r\\/), :space
          state = heredocs.shift
          encoder.begin_group state.type
          heredocs = nil if heredocs.empty?
        else
          encoder.text_token match, :space
        end
        
      elsif state == :initial
        
        # IDENTS #
        if !method_call_expected &&
           match = scan(unicode ? %r#{patterns::METHOD_NAME}/o :
                                  %r#{patterns::METHOD_NAME}/)
          value_expected = false
          kind = patterns::IDENT_KIND[match]
          if kind == :ident
            if match[%r\A[A-Z]/] && !(match[%r[!?]$/] || match?(%r\(/))
              kind = :constant
            end
          elsif kind == :keyword
            state = patterns::KEYWORD_NEW_STATE[match]
            value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
          end
          value_expected = true if !value_expected && check(%r#{patterns::VALUE_FOLLOWS}/)
          encoder.text_token match, kind
          
        elsif method_call_expected &&
           match = scan(unicode ? %r#{patterns::METHOD_AFTER_DOT}/o :
                                  %r#{patterns::METHOD_AFTER_DOT}/)
          if method_call_expected == '::' && match[%r\A[A-Z]/] && !match?(%r\(/)
            encoder.text_token match, :constant
          else
            encoder.text_token match, :ident
          end
          method_call_expected = false
          value_expected = check(%r#{patterns::VALUE_FOLLOWS}/)
          
        # OPERATORS #
        elsif !method_call_expected && match = scan(%r (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /)
          method_call_expected = self[1]
          value_expected = !method_call_expected && self[2]
          if inline_block_stack
            case match
            when '{'
              inline_block_curly_depth += 1
            when '}'
              inline_block_curly_depth -= 1
              if inline_block_curly_depth == 0  # closing brace of inline block reached
                state, inline_block_curly_depth, heredocs = inline_block_stack.pop
                inline_block_stack = nil if inline_block_stack.empty?
                heredocs = nil if heredocs && heredocs.empty?
                encoder.text_token match, :inline_delimiter
                encoder.end_group :inline
                next
              end
            end
          end
          encoder.text_token match, :operator
          
        elsif match = scan(unicode ? %r#{patterns::SYMBOL}/o :
                                     %r#{patterns::SYMBOL}/)
          case delim = match[1]
          when '', ""
            encoder.begin_group :symbol
            encoder.text_token ':', :symbol
            match = delim.chr
            encoder.text_token match, :delimiter
            state = self.class::StringState.new :symbol, delim == "", match
          else
            encoder.text_token match, :symbol
            value_expected = false
          end
          
        elsif match = scan(%r ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /x)
          encoder.begin_group :string
          if match.size == 1
            encoder.text_token match, :delimiter
            state = self.class::StringState.new :string, match == '"', match  # important for streaming
          else
            encoder.text_token match[0,1], :delimiter
            encoder.text_token match[1..-2], :content if match.size > 2
            encoder.text_token match[-1,1], :delimiter
            encoder.end_group :string
            value_expected = false
          end
          
        elsif match = scan(unicode ? %r#{patterns::INSTANCE_VARIABLE}/o :
                                     %r#{patterns::INSTANCE_VARIABLE}/)
          value_expected = false
          encoder.text_token match, :instance_variable
          
        elsif value_expected && match = scan(%r\//)
          encoder.begin_group :regexp
          encoder.text_token match, :delimiter
          state = self.class::StringState.new :regexp, true, '/'
          
        elsif match = scan(value_expected ? %r[-+]?#{patterns::NUMERIC}/ : %r#{patterns::NUMERIC}/)
          if method_call_expected
            encoder.text_token match, :error
            method_call_expected = false
          else
            encoder.text_token match, self[1] ? :float : :integer  # TODO: send :hex/:octal/:binary
          end
          value_expected = false
          
        elsif match = scan(%r [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /)
          value_expected = true
          encoder.text_token match, :operator
          
        elsif value_expected && match = scan(%r#{patterns::HEREDOC_OPEN}/)
          quote = self[3]
          delim = self[quote ? 4 : 2]
          kind = patterns::QUOTE_TO_TYPE[quote]
          encoder.begin_group kind
          encoder.text_token match, :delimiter
          encoder.end_group kind
          heredocs ||= []  # create heredocs if empty
          heredocs << self.class::StringState.new(kind, quote != "'", delim,
            self[1] == '-' ? :indented : :linestart)
          value_expected = false
          
        elsif value_expected && match = scan(%r#{patterns::FANCY_STRING_START}/)
          kind = patterns::FANCY_STRING_KIND[self[1]]
          encoder.begin_group kind
          state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
          encoder.text_token match, :delimiter
          
        elsif value_expected && match = scan(%r#{patterns::CHARACTER}/)
          value_expected = false
          encoder.text_token match, :integer
          
        elsif match = scan(%r %=? | <(?:<|=>?)? | \? /)
          value_expected = true
          encoder.text_token match, :operator
          
        elsif match = scan(%r`/)
          encoder.begin_group :shell
          encoder.text_token match, :delimiter
          state = self.class::StringState.new :shell, true, match
          
        elsif match = scan(unicode ? %r#{patterns::GLOBAL_VARIABLE}/o :
                                     %r#{patterns::GLOBAL_VARIABLE}/)
          encoder.text_token match, :global_variable
          value_expected = false
          
        elsif match = scan(unicode ? %r#{patterns::CLASS_VARIABLE}/o :
                                     %r#{patterns::CLASS_VARIABLE}/)
          encoder.text_token match, :class_variable
          value_expected = false
          
        elsif match = scan(%r\\\z/)
          encoder.text_token match, :space
          
        else
          if method_call_expected
            method_call_expected = false
            next
          end
          unless unicode
            # check for unicode
            $DEBUG_BEFORE, $DEBUG = $DEBUG, false
            begin
              if check(%r./u).size > 1
                # seems like we should try again with unicode
                unicode = true
              end
            rescue
              # bad unicode char; use getch
            ensure
              $DEBUG = $DEBUG_BEFORE
            end
            next if unicode
          end
          
          encoder.text_token getch, :error
          
        end
        
        if last_state
          state = last_state
          last_state = nil
        end
        
      elsif state == :def_expected
        if match = scan(unicode ? %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o :
                                  %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/)
          encoder.text_token match, :method
          state = :initial
        else
          last_state = :dot_expected
          state = :initial
        end
        
      elsif state == :dot_expected
        if match = scan(%r\.|::/)
          # invalid definition
          state = :def_expected
          encoder.text_token match, :operator
        else
          state = :initial
        end
        
      elsif state == :module_expected
        if match = scan(%r<</)
          encoder.text_token match, :operator
        else
          state = :initial
          if match = scan(unicode ? %r (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ux :
                                    %r (?:#{patterns::IDENT}::)* #{patterns::IDENT} /x)
            encoder.text_token match, :class
          end
        end
        
      elsif state == :undef_expected
        state = :undef_comma_expected
        if match = scan(unicode ? %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o :
                                  %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/)
          encoder.text_token match, :method
        elsif match = scan(%r#{patterns::SYMBOL}/)
          case delim = match[1]
          when '', ""
            encoder.begin_group :symbol
            encoder.text_token ':', :symbol
            match = delim.chr
            encoder.text_token match, :delimiter
            state = self.class::StringState.new :symbol, delim == "", match
            state.next_state = :undef_comma_expected
          else
            encoder.text_token match, :symbol
          end
        else
          state = :initial
        end
        
      elsif state == :undef_comma_expected
        if match = scan(%r,/)
          encoder.text_token match, :operator
          state = :undef_expected
        else
          state = :initial
        end
        
      elsif state == :alias_expected
        match = scan(unicode ? %r(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o :
                               %r(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/)
        
        if match
          encoder.text_token self[1], (self[1][0] == :: ? :symbol : :method)
          encoder.text_token self[2], :space
          encoder.text_token self[3], (self[3][0] == :: ? :symbol : :method)
        end
        state = :initial
        
      else
        #:nocov:
        raise_inspect 'Unknown state: %p' % [state], encoder
        #:nocov:
      end
      
    else  # StringState
      
      match = scan_until(state.pattern) || scan_rest
      unless match.empty?
        encoder.text_token match, :content
        break if eos?
      end
      
      if state.heredoc && self[1]  # end of heredoc
        match = getch
        match << scan_until(%r$/) unless eos?
        encoder.text_token match, :delimiter unless match.empty?
        encoder.end_group state.type
        state = state.next_state
        next
      end
      
      case match = getch
      
      when state.delim
        if state.paren_depth
          state.paren_depth -= 1
          if state.paren_depth > 0
            encoder.text_token match, :content
            next
          end
        end
        encoder.text_token match, :delimiter
        if state.type == :regexp && !eos?
          match = scan(%r#{patterns::REGEXP_MODIFIERS}/)
          encoder.text_token match, :modifier unless match.empty?
        end
        encoder.end_group state.type
        value_expected = false
        state = state.next_state
        
      when '\'
        if state.interpreted
          if esc = scan(%r#{patterns::ESCAPE}/)
            encoder.text_token match + esc, :char
          else
            encoder.text_token match, :error
          end
        else
          case esc = getch
          when nil
            encoder.text_token match, :content
          when state.delim, '\'
            encoder.text_token match + esc, :char
          else
            encoder.text_token match + esc, :content
          end
        end
        
      when '#'
        case peek(1)
        when '{'
          inline_block_stack ||= []
          inline_block_stack << [state, inline_block_curly_depth, heredocs]
          value_expected = true
          state = :initial
          inline_block_curly_depth = 1
          encoder.begin_group :inline
          encoder.text_token match + getch, :inline_delimiter
        when '$', '@'
          encoder.text_token match, :escape
          last_state = state
          state = :initial
        else
          #:nocov:
          raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
          #:nocov:
        end
        
      when state.opening_paren
        state.paren_depth += 1
        encoder.text_token match, :content
        
      else
        #:nocov
        raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
        #:nocov:
        
      end
      
    end
    
  end
  
  # cleaning up
  if state.is_a? StringState
    encoder.end_group state.type
  end
  
  if options[:keep_state]
    if state.is_a?(StringState) && state.heredoc
      (heredocs ||= []).unshift state
      state = :initial
    elsif heredocs && heredocs.empty?
      heredocs = nil
    end
    @state = state, heredocs
  end
  
  if inline_block_stack
    until inline_block_stack.empty?
      state, = *inline_block_stack.pop
      encoder.end_group :inline
      encoder.end_group state.type
    end
  end
  
  encoder
end
setup() click to toggle source
# File lib/coderay/scanners/ruby.rb, line 25
def setup
  @state = :initial
end