module Brice::Colours::Tokenizer

Tokenize an inspection string.

Public Instance Methods

tokenize(str) { |last, value| ... } click to toggle source
# File lib/brice/colours.rb, line 244
def tokenize(str)
  raise ArgumentError, 'no block given' unless block_given?

  chars = str.split(//)
  char  = last_char = repeat = nil
  states, value, index = [], '', 0

  reset = lambda { |*args|
    states.pop

    value  = ''
    repeat = args.first unless args.empty?
  }

  yield_last = lambda { |*args|
    yield states.last, value
    reset[*args]
  }

  until index > chars.size
    char, repeat = chars[index], false

    case states.last
      when nil
        case char
          when ':' then states << :symbol
          when '"' then states << :string
          when '#' then states << :object
          when /[a-z]/i
            states << :keyword
            repeat = true
          when /[0-9-]/
            states << :number
            repeat = true
          when '{'  then yield :open_hash,   '{'
          when '['  then yield :open_array,  '['
          when ']'  then yield :close_array, ']'
          when '}'  then yield :close_hash,  '}'
          when /\s/ then yield :whitespace,  char
          when ','  then yield :comma,       ','
          when '>'  then yield :refers,      '=>' if last_char == '='
          when '.'  then yield :range,       '..' if last_char == '.'
          when '='  then nil
          else           yield :unknown,     char if char
        end
      when :symbol
        if char =~ /[a-z0-9_!?]/  # should have =, but that messes up foo=>bar
          value << char
        else
          yield :symbol_prefix, ':'
          yield_last[true]
        end
      when :string
        if char == '"'
          if last_char == '\'
            value[-1] = char
          else
            yield :open_string,  char
            yield_last[]
            yield :close_string, char
          end
        else
          value << char
        end
      when :keyword
        if char =~ /[a-z0-9_]/i
          value << char
        else
          states[-1] = :class if value =~ /\A[A-Z]/
          yield_last[true]

          value << char if char == '.'
        end
      when :number
        case char
          when /[0-9e-]/
            value << char
          when '.'
            if last_char == char
              value.chop!

              yield_last[]
              yield :range, '..'
            else
              value << char
            end
          else
            yield_last[true]
        end
      when :object
        case char
          when '<'
            yield :open_object, '#<'
            states << :object_class
          when ':'
            states << :object_addr
          when '@'
            states << :object_line
          when '>'
            yield :close_object, '>'
            reset[]
        end
      when :object_class
        if char == ':'
          yield_last[true]
        else
          value << char
        end
      when :object_addr
        case char
          when '>'
            # ignore
          when '@'
            yield :object_addr_prefix, ':'
            yield_last[true]
          else
            value << char
        end
      when :object_line
        if char == '>'
          yield :object_line_prefix, '@'
          yield_last[true]
        else
          value << char
        end
      else
        raise "unknown state: #{states}"
    end

    unless repeat
      index += 1
      last_char = char
    end
  end
end