class Lsi4R::Doc
Constants
- TOKEN_RE
Attributes
transform[R]
key[R]
norm[R]
vector[R]
Public Class Methods
new(key, value, list, freq)
click to toggle source
# File lib/lsi4r/doc.rb, line 61 def initialize(key, value, list, freq) @key, @list, @freq, @total, @map = key, list, freq, 1, hash = Hash.new(0) value.is_a?(Hash) ? value.each { |k, v| hash[i = list[k]] = v; freq[i] += 1 } : build_hash(value, list, hash).each_key { |i| freq[i] += 1 } self.vector = raw_vector end
transform=(transform)
click to toggle source
# File lib/lsi4r/doc.rb, line 43 def transform=(transform) method = :transformed_vector @transform = case transform ||= :raw when Symbol, String alias_method(method, "#{transform}_vector") transform.to_sym when Proc, UnboundMethod define_method(method, transform) transform.to_s else raise TypeError, "wrong argument type #{transform.class} " << '(expected Symbol/String or Proc/UnboundMethod)' end end
Public Instance Methods
foat_vector(*args)
click to toggle source
TODO: “first-order association transform” ???
# File lib/lsi4r/doc.rb, line 84 def foat_vector(*args) vec, q = raw_vector(*args), 0 return vec unless (s = vec.sum) > 1 vec.each { |v| q -= (w = v / s) * Math.log(w) if v > 0 } vec.map { |v| Math.log(v + 1) / q } end
inspect()
click to toggle source
# File lib/lsi4r/doc.rb, line 106 def inspect '%s@%p/%d' % [self.class, key, size] end
raw_vector(size = @list.size, *)
click to toggle source
# File lib/lsi4r/doc.rb, line 77 def raw_vector(size = @list.size, *) vec = GSL::Vector.calloc(size) each { |k, v| vec[k] = v } vec end
tfidf_vector(*args)
click to toggle source
# File lib/lsi4r/doc.rb, line 92 def tfidf_vector(*args) vec, f = raw_vector(*args), @freq s, d = vec.sum, @total = args.fetch(1, @total).to_f vec.enum_for(:map).with_index { |v, i| v > 0 ? Math.log(d / f[i]) * v / s : v } end
vector=(vec)
click to toggle source
# File lib/lsi4r/doc.rb, line 102 def vector=(vec) @vector, @norm = vec, vec.normalize end
Private Instance Methods
build_enum(value, re = TOKEN_RE)
click to toggle source
# File lib/lsi4r/doc.rb, line 117 def build_enum(value, re = TOKEN_RE) value = value.read if value.respond_to?(:read) value = value.split(re) if value.respond_to?(:split) value end
build_hash(value, list, hash)
click to toggle source
# File lib/lsi4r/doc.rb, line 112 def build_hash(value, list, hash) build_enum(value).each { |i| hash[list[i]] += 1 } hash end