class Lsi4R

Constants

DEFAULT_CUTOFF
DEFAULT_EPSILON
DEFAULT_TRANSFORM
VERSION

Public Class Methods

build(items, options = {}) click to toggle source
# File lib/lsi4r.rb, line 44
def build(items, options = {})
  lsi = new(items)
  lsi if lsi.build(options)
end
each_norm(items, options = {}, build_options = {}, &block) click to toggle source
# File lib/lsi4r.rb, line 49
def each_norm(items, options = {}, build_options = {}, &block)
  lsi = new(items)
  lsi.each_norm(nil, options, &block) if lsi.build(build_options)
end
new(items = {}) click to toggle source
# File lib/lsi4r.rb, line 56
def initialize(items = {})
  reset
  items.each { |k, v| self[k] = v || k }
end

Public Instance Methods

<<(value) click to toggle source
# File lib/lsi4r.rb, line 79
def <<(value)
  add(value.object_id, value)
end
[]=(key, value) click to toggle source
# File lib/lsi4r.rb, line 70
def []=(key, value)
  @hash[key] = Doc.new(key, value, @list, @freq)
end
add(key, value = key) click to toggle source
# File lib/lsi4r.rb, line 74
def add(key, value = key)
  self[key] = value
  self
end
build(options = {}) click to toggle source
# File lib/lsi4r.rb, line 149
def build(options = {})
  build!(docs, @list, options.is_a?(Hash) ?
    options : { cutoff: options }) if size > 1
end
each(key = nil, options = {}, &block)
Alias for: each_norm
each_norm(key = nil, options = {}, &block) click to toggle source
# File lib/lsi4r.rb, line 121
def each_norm(key = nil, options = {}, &block)
  each_term(key, options.merge(norm: true), &block)
end
Also aliased as: each
each_term(key = nil, options = {}) { |doc, list, v| ... } click to toggle source
min

minimum value to consider

abs

minimum absolute value to consider

nul

exclude null values (true or Float)

new

exclude original terms / only yield new ones

# File lib/lsi4r.rb, line 102
def each_term(key = nil, options = {})
  return enum_for(:each_term, key, options) unless block_given?

  min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
  nul = DEFAULT_EPSILON if nul == true

  list = @invlist

  each_vector(key, options[:norm]) { |doc, vec|
    vec.enum_for(:each).with_index { |v, i|
      yield doc, list[i], v unless v.nan? ||
                                   (min && v < min) ||
                                   (abs && v.abs < abs) ||
                                   (nul && v.abs < nul) ||
                                   (new && doc.include?(i))
    }
  }
end
each_vector(key = nil, norm = true) { |doc, vec| ... } click to toggle source
# File lib/lsi4r.rb, line 83
def each_vector(key = nil, norm = true)
  return enum_for(:each_vector, key, norm) unless block_given?

  block = lambda { |doc|
    vec = norm ? doc.norm : doc.vector
    yield doc, vec if vec
  }

  key.nil? ? docs.each(&block) : begin
    doc = self[key] and block[doc]
  end

  self
end
inspect() click to toggle source
# File lib/lsi4r.rb, line 161
def inspect
  '%s@%d/%d' % [self.class, size, @list.size]
end
reset() click to toggle source
# File lib/lsi4r.rb, line 154
def reset
  @hash, @list, @freq, @invlist =
    {}, Hash.new { |h, k| h[k] = h.size }, Hash.new(0), {}

  self
end
to_a(norm = true) click to toggle source
# File lib/lsi4r.rb, line 165
def to_a(norm = true)
  (norm ? map { |_, doc| doc.norm.to_a } :
          map { |_, doc| doc.vector.to_a }).transpose
end

Private Instance Methods

build!(docs, list, options) click to toggle source
# File lib/lsi4r.rb, line 172
def build!(docs, list, options)
  Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM)

  @invlist = list.invert

  # TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of
  # MxN matrix, M<N, is not implemented (file svd.c, line 61)
  u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp

  (u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
    enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }

  size
end
matrix(d = docs, m = @list.size, n = d.size) click to toggle source
# File lib/lsi4r.rb, line 187
def matrix(d = docs, m = @list.size, n = d.size)
  x = GSL::Matrix.alloc(m, n)
  d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) }
  x
end
reduce(s, k, m = s.size) click to toggle source
k == nil

keep all

k >= 1

keep this many

k < 1

keep (at most) this proportion

# File lib/lsi4r.rb, line 196
def reduce(s, k, m = s.size)
  if k && k < m
    k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
  end

  s.to_m_diagonal
end