class Lsi4R
Constants
- DEFAULT_CUTOFF
- DEFAULT_EPSILON
- DEFAULT_TRANSFORM
- VERSION
Public Class Methods
build(items, options = {})
click to toggle source
# File lib/lsi4r.rb, line 44 def build(items, options = {}) lsi = new(items) lsi if lsi.build(options) end
each_norm(items, options = {}, build_options = {}, &block)
click to toggle source
# File lib/lsi4r.rb, line 49 def each_norm(items, options = {}, build_options = {}, &block) lsi = new(items) lsi.each_norm(nil, options, &block) if lsi.build(build_options) end
new(items = {})
click to toggle source
# File lib/lsi4r.rb, line 56 def initialize(items = {}) reset items.each { |k, v| self[k] = v || k } end
Public Instance Methods
<<(value)
click to toggle source
# File lib/lsi4r.rb, line 79 def <<(value) add(value.object_id, value) end
[]=(key, value)
click to toggle source
# File lib/lsi4r.rb, line 70 def []=(key, value) @hash[key] = Doc.new(key, value, @list, @freq) end
add(key, value = key)
click to toggle source
# File lib/lsi4r.rb, line 74 def add(key, value = key) self[key] = value self end
build(options = {})
click to toggle source
# File lib/lsi4r.rb, line 149 def build(options = {}) build!(docs, @list, options.is_a?(Hash) ? options : { cutoff: options }) if size > 1 end
each_norm(key = nil, options = {}, &block)
click to toggle source
# File lib/lsi4r.rb, line 121 def each_norm(key = nil, options = {}, &block) each_term(key, options.merge(norm: true), &block) end
Also aliased as: each
each_term(key = nil, options = {}) { |doc, list, v| ... }
click to toggle source
- min
-
minimum value to consider
- abs
-
minimum absolute value to consider
- nul
-
exclude null values (true or Float)
- new
-
exclude original terms / only yield new ones
# File lib/lsi4r.rb, line 102 def each_term(key = nil, options = {}) return enum_for(:each_term, key, options) unless block_given? min, abs, nul, new = options.values_at(:min, :abs, :nul, :new) nul = DEFAULT_EPSILON if nul == true list = @invlist each_vector(key, options[:norm]) { |doc, vec| vec.enum_for(:each).with_index { |v, i| yield doc, list[i], v unless v.nan? || (min && v < min) || (abs && v.abs < abs) || (nul && v.abs < nul) || (new && doc.include?(i)) } } end
each_vector(key = nil, norm = true) { |doc, vec| ... }
click to toggle source
# File lib/lsi4r.rb, line 83 def each_vector(key = nil, norm = true) return enum_for(:each_vector, key, norm) unless block_given? block = lambda { |doc| vec = norm ? doc.norm : doc.vector yield doc, vec if vec } key.nil? ? docs.each(&block) : begin doc = self[key] and block[doc] end self end
inspect()
click to toggle source
# File lib/lsi4r.rb, line 161 def inspect '%s@%d/%d' % [self.class, size, @list.size] end
reset()
click to toggle source
# File lib/lsi4r.rb, line 154 def reset @hash, @list, @freq, @invlist = {}, Hash.new { |h, k| h[k] = h.size }, Hash.new(0), {} self end
to_a(norm = true)
click to toggle source
# File lib/lsi4r.rb, line 165 def to_a(norm = true) (norm ? map { |_, doc| doc.norm.to_a } : map { |_, doc| doc.vector.to_a }).transpose end
Private Instance Methods
build!(docs, list, options)
click to toggle source
# File lib/lsi4r.rb, line 172 def build!(docs, list, options) Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM) @invlist = list.invert # TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of # MxN matrix, M<N, is not implemented (file svd.c, line 61) u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp (u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans). enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row } size end
matrix(d = docs, m = @list.size, n = d.size)
click to toggle source
# File lib/lsi4r.rb, line 187 def matrix(d = docs, m = @list.size, n = d.size) x = GSL::Matrix.alloc(m, n) d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) } x end
reduce(s, k, m = s.size)
click to toggle source
- k == nil
-
keep all
- k >= 1
-
keep this many
- k < 1
-
keep (at most) this proportion
# File lib/lsi4r.rb, line 196 def reduce(s, k, m = s.size) if k && k < m k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero end s.to_m_diagonal end