class Lsi4R::Doc

Constants

TOKEN_RE

Attributes

transform[R]
key[R]
norm[R]
vector[R]

Public Class Methods

new(key, value, list, freq) click to toggle source
# File lib/lsi4r/doc.rb, line 61
def initialize(key, value, list, freq)
  @key, @list, @freq, @total, @map = key, list, freq, 1, hash = Hash.new(0)

  value.is_a?(Hash) ?
    value.each { |k, v| hash[i = list[k]] = v; freq[i] += 1 } :
    build_hash(value, list, hash).each_key { |i| freq[i] += 1 }

  self.vector = raw_vector
end
transform=(transform) click to toggle source
# File lib/lsi4r/doc.rb, line 43
def transform=(transform)
  method = :transformed_vector

  @transform = case transform ||= :raw
    when Symbol, String
      alias_method(method, "#{transform}_vector")
      transform.to_sym
    when Proc, UnboundMethod
      define_method(method, transform)
      transform.to_s
    else
      raise TypeError, "wrong argument type #{transform.class} " <<
                       '(expected Symbol/String or Proc/UnboundMethod)'
  end
end

Public Instance Methods

foat_vector(*args) click to toggle source

TODO: “first-order association transform” ???

# File lib/lsi4r/doc.rb, line 84
def foat_vector(*args)
  vec, q = raw_vector(*args), 0
  return vec unless (s = vec.sum) > 1

  vec.each { |v| q -= (w = v / s) * Math.log(w) if v > 0 }
  vec.map { |v| Math.log(v + 1) / q }
end
inspect() click to toggle source
# File lib/lsi4r/doc.rb, line 106
def inspect
  '%s@%p/%d' % [self.class, key, size]
end
raw_vector(size = @list.size, *) click to toggle source
# File lib/lsi4r/doc.rb, line 77
def raw_vector(size = @list.size, *)
  vec = GSL::Vector.calloc(size)
  each { |k, v| vec[k] = v }
  vec
end
tfidf_vector(*args) click to toggle source
# File lib/lsi4r/doc.rb, line 92
def tfidf_vector(*args)
  vec, f = raw_vector(*args), @freq
  s, d = vec.sum, @total = args.fetch(1, @total).to_f

  vec.enum_for(:map).with_index { |v, i|
    v > 0 ? Math.log(d / f[i]) * v / s : v }
end
vector=(vec) click to toggle source
# File lib/lsi4r/doc.rb, line 102
def vector=(vec)
  @vector, @norm = vec, vec.normalize
end

Private Instance Methods

build_enum(value, re = TOKEN_RE) click to toggle source
# File lib/lsi4r/doc.rb, line 117
def build_enum(value, re = TOKEN_RE)
  value = value.read if value.respond_to?(:read)
  value = value.split(re) if value.respond_to?(:split)
  value
end
build_hash(value, list, hash) click to toggle source
# File lib/lsi4r/doc.rb, line 112
def build_hash(value, list, hash)
  build_enum(value).each { |i| hash[list[i]] += 1 }
  hash
end