This scanner is really complex, since Ruby is a complex language!
It tries to highlight 100% of all common code, and 90% of strange codes.
It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.
# File lib/coderay/scanners/ruby.rb, line 19 def interpreted_string_state StringState.new :string, true, '"' end
# File lib/coderay/scanners/ruby.rb, line 29 def scan_tokens encoder, options state, heredocs = options[:state] || @state heredocs = heredocs.dup if heredocs.is_a?(Array) if state && state.instance_of?(StringState) encoder.begin_group state.type end last_state = nil method_call_expected = false value_expected = true inline_block_stack = nil inline_block_curly_depth = 0 if heredocs state = heredocs.shift encoder.begin_group state.type heredocs = nil if heredocs.empty? end # def_object_stack = nil # def_object_paren_depth = 0 patterns = Patterns # avoid constant lookup unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8' until eos? if state.instance_of? ::Symbol if match = scan(%r[ \t\f\v]+/) encoder.text_token match, :space elsif match = scan(%r\n/) if heredocs unscan # heredoc scanning needs \n at start state = heredocs.shift encoder.begin_group state.type heredocs = nil if heredocs.empty? else state = :initial if state == :undef_comma_expected encoder.text_token match, :space value_expected = true end elsif match = scan(bol? ? %r \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /x : %r\#.*/) encoder.text_token match, self[1] ? :doctype : :comment elsif match = scan(%r\\\n/) if heredocs unscan # heredoc scanning needs \n at start encoder.text_token scan(%r\\/), :space state = heredocs.shift encoder.begin_group state.type heredocs = nil if heredocs.empty? else encoder.text_token match, :space end elsif state == :initial # IDENTS # if !method_call_expected && match = scan(unicode ? %r#{patterns::METHOD_NAME}/o : %r#{patterns::METHOD_NAME}/) value_expected = false kind = patterns::IDENT_KIND[match] if kind == :ident if match[%r\A[A-Z]/] && !(match[%r[!?]$/] || match?(%r\(/)) kind = :constant end elsif kind == :keyword state = patterns::KEYWORD_NEW_STATE[match] value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match] end value_expected = true if !value_expected && check(%r#{patterns::VALUE_FOLLOWS}/) encoder.text_token match, kind elsif method_call_expected && match = scan(unicode ? %r#{patterns::METHOD_AFTER_DOT}/o : %r#{patterns::METHOD_AFTER_DOT}/) if method_call_expected == '::' && match[%r\A[A-Z]/] && !match?(%r\(/) encoder.text_token match, :constant else encoder.text_token match, :ident end method_call_expected = false value_expected = check(%r#{patterns::VALUE_FOLLOWS}/) # OPERATORS # elsif !method_call_expected && match = scan(%r (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /) method_call_expected = self[1] value_expected = !method_call_expected && self[2] if inline_block_stack case match when '{' inline_block_curly_depth += 1 when '}' inline_block_curly_depth -= 1 if inline_block_curly_depth == 0 # closing brace of inline block reached state, inline_block_curly_depth, heredocs = inline_block_stack.pop inline_block_stack = nil if inline_block_stack.empty? heredocs = nil if heredocs && heredocs.empty? encoder.text_token match, :inline_delimiter encoder.end_group :inline next end end end encoder.text_token match, :operator elsif match = scan(unicode ? %r#{patterns::SYMBOL}/o : %r#{patterns::SYMBOL}/) case delim = match[1] when '', "" encoder.begin_group :symbol encoder.text_token ':', :symbol match = delim.chr encoder.text_token match, :delimiter state = self.class::StringState.new :symbol, delim == "", match else encoder.text_token match, :symbol value_expected = false end elsif match = scan(%r ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /x) encoder.begin_group :string if match.size == 1 encoder.text_token match, :delimiter state = self.class::StringState.new :string, match == '"', match # important for streaming else encoder.text_token match[0,1], :delimiter encoder.text_token match[1..-2], :content if match.size > 2 encoder.text_token match[-1,1], :delimiter encoder.end_group :string value_expected = false end elsif match = scan(unicode ? %r#{patterns::INSTANCE_VARIABLE}/o : %r#{patterns::INSTANCE_VARIABLE}/) value_expected = false encoder.text_token match, :instance_variable elsif value_expected && match = scan(%r\//) encoder.begin_group :regexp encoder.text_token match, :delimiter state = self.class::StringState.new :regexp, true, '/' elsif match = scan(value_expected ? %r[-+]?#{patterns::NUMERIC}/ : %r#{patterns::NUMERIC}/) if method_call_expected encoder.text_token match, :error method_call_expected = false else encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary end value_expected = false elsif match = scan(%r [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /) value_expected = true encoder.text_token match, :operator elsif value_expected && match = scan(%r#{patterns::HEREDOC_OPEN}/) quote = self[3] delim = self[quote ? 4 : 2] kind = patterns::QUOTE_TO_TYPE[quote] encoder.begin_group kind encoder.text_token match, :delimiter encoder.end_group kind heredocs ||= [] # create heredocs if empty heredocs << self.class::StringState.new(kind, quote != "'", delim, self[1] == '-' ? :indented : :linestart) value_expected = false elsif value_expected && match = scan(%r#{patterns::FANCY_STRING_START}/) kind = patterns::FANCY_STRING_KIND[self[1]] encoder.begin_group kind state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2] encoder.text_token match, :delimiter elsif value_expected && match = scan(%r#{patterns::CHARACTER}/) value_expected = false encoder.text_token match, :integer elsif match = scan(%r %=? | <(?:<|=>?)? | \? /) value_expected = true encoder.text_token match, :operator elsif match = scan(%r`/) encoder.begin_group :shell encoder.text_token match, :delimiter state = self.class::StringState.new :shell, true, match elsif match = scan(unicode ? %r#{patterns::GLOBAL_VARIABLE}/o : %r#{patterns::GLOBAL_VARIABLE}/) encoder.text_token match, :global_variable value_expected = false elsif match = scan(unicode ? %r#{patterns::CLASS_VARIABLE}/o : %r#{patterns::CLASS_VARIABLE}/) encoder.text_token match, :class_variable value_expected = false elsif match = scan(%r\\\z/) encoder.text_token match, :space else if method_call_expected method_call_expected = false next end unless unicode # check for unicode $DEBUG_BEFORE, $DEBUG = $DEBUG, false begin if check(%r./u).size > 1 # seems like we should try again with unicode unicode = true end rescue # bad unicode char; use getch ensure $DEBUG = $DEBUG_BEFORE end next if unicode end encoder.text_token getch, :error end if last_state state = last_state last_state = nil end elsif state == :def_expected if match = scan(unicode ? %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o : %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/) encoder.text_token match, :method state = :initial else last_state = :dot_expected state = :initial end elsif state == :dot_expected if match = scan(%r\.|::/) # invalid definition state = :def_expected encoder.text_token match, :operator else state = :initial end elsif state == :module_expected if match = scan(%r<</) encoder.text_token match, :operator else state = :initial if match = scan(unicode ? %r (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ux : %r (?:#{patterns::IDENT}::)* #{patterns::IDENT} /x) encoder.text_token match, :class end end elsif state == :undef_expected state = :undef_comma_expected if match = scan(unicode ? %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o : %r(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/) encoder.text_token match, :method elsif match = scan(%r#{patterns::SYMBOL}/) case delim = match[1] when '', "" encoder.begin_group :symbol encoder.text_token ':', :symbol match = delim.chr encoder.text_token match, :delimiter state = self.class::StringState.new :symbol, delim == "", match state.next_state = :undef_comma_expected else encoder.text_token match, :symbol end else state = :initial end elsif state == :undef_comma_expected if match = scan(%r,/) encoder.text_token match, :operator state = :undef_expected else state = :initial end elsif state == :alias_expected match = scan(unicode ? %r(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o : %r(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/) if match encoder.text_token self[1], (self[1][0] == :: ? :symbol : :method) encoder.text_token self[2], :space encoder.text_token self[3], (self[3][0] == :: ? :symbol : :method) end state = :initial else #:nocov: raise_inspect 'Unknown state: %p' % [state], encoder #:nocov: end else # StringState match = scan_until(state.pattern) || scan_rest unless match.empty? encoder.text_token match, :content break if eos? end if state.heredoc && self[1] # end of heredoc match = getch match << scan_until(%r$/) unless eos? encoder.text_token match, :delimiter unless match.empty? encoder.end_group state.type state = state.next_state next end case match = getch when state.delim if state.paren_depth state.paren_depth -= 1 if state.paren_depth > 0 encoder.text_token match, :content next end end encoder.text_token match, :delimiter if state.type == :regexp && !eos? match = scan(%r#{patterns::REGEXP_MODIFIERS}/) encoder.text_token match, :modifier unless match.empty? end encoder.end_group state.type value_expected = false state = state.next_state when '\' if state.interpreted if esc = scan(%r#{patterns::ESCAPE}/) encoder.text_token match + esc, :char else encoder.text_token match, :error end else case esc = getch when nil encoder.text_token match, :content when state.delim, '\' encoder.text_token match + esc, :char else encoder.text_token match + esc, :content end end when '#' case peek(1) when '{' inline_block_stack ||= [] inline_block_stack << [state, inline_block_curly_depth, heredocs] value_expected = true state = :initial inline_block_curly_depth = 1 encoder.begin_group :inline encoder.text_token match + getch, :inline_delimiter when '$', '@' encoder.text_token match, :escape last_state = state state = :initial else #:nocov: raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder #:nocov: end when state.opening_paren state.paren_depth += 1 encoder.text_token match, :content else #:nocov raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder #:nocov: end end end # cleaning up if state.is_a? StringState encoder.end_group state.type end if options[:keep_state] if state.is_a?(StringState) && state.heredoc (heredocs ||= []).unshift state state = :initial elsif heredocs && heredocs.empty? heredocs = nil end @state = state, heredocs end if inline_block_stack until inline_block_stack.empty? state, = *inline_block_stack.pop encoder.end_group :inline encoder.end_group state.type end end encoder end
# File lib/coderay/scanners/ruby.rb, line 25 def setup @state = :initial end