class Hal4R

Constants

DEFAULT_WINDOW_SIZE
VERSION

Attributes

window_size[RW]

Public Class Methods

new(terms = [], window_size = nil) click to toggle source
# File lib/hal4r.rb, line 42
def initialize(terms = [], window_size = nil)
  reset(window_size)
  add(terms)
end

Public Instance Methods

<<(term) click to toggle source
# File lib/hal4r.rb, line 56
def <<(term)
  row = @matrix.get(term_index = @idmap[term])

  @window.each_with_index { |index, weight|
    row[index] += weight + 1 if index
  }.insert(-1, term_index).shift

  self
end
[](term, norm = false)
Alias for: vector
add(terms) click to toggle source
# File lib/hal4r.rb, line 66
def add(terms)
  terms.each { |term| self << term }
  self
end
cityblock(term1, term2, norm = true)
Alias for: manhattan
distance(term1, term2, dimension, norm = true)
Alias for: minkowski
each(&block)
Alias for: each_norm
each_distance(norm = true, dimension = 2) { |*sort!, minkowski(*t, dimension, norm)| ... } click to toggle source
# File lib/hal4r.rb, line 95
def each_distance(norm = true, dimension = 2)
  return enum_for(:each_distance, norm, dimension) unless block_given?

  terms.combination(2) { |t| yield *t.sort!, minkowski(*t, dimension, norm) }

  self
end
each_norm(&block) click to toggle source
# File lib/hal4r.rb, line 89
def each_norm(&block)
  each_vector(true, &block)
end
Also aliased as: each
each_vector(norm = false) { |vector_i(index, norm).to_a| ... } click to toggle source
# File lib/hal4r.rb, line 81
def each_vector(norm = false)
  return enum_for(:each_vector, norm) unless block_given?

  @idmap.each_value { |index| yield vector_i(index, norm).to_a }

  self
end
euclidean(term1, term2, norm = true) click to toggle source
# File lib/hal4r.rb, line 114
def euclidean(term1, term2, norm = true)
  minkowski(term1, term2, 2, norm)
end
inspect() click to toggle source
# File lib/hal4r.rb, line 141
def inspect
  '#<%s:0x%x @window_size=%p, @size=%p>' % [
    self.class, object_id, window_size, size
  ]
end
manhattan(term1, term2, norm = true) click to toggle source
# File lib/hal4r.rb, line 118
def manhattan(term1, term2, norm = true)
  minkowski(term1, term2, 1, norm)
end
Also aliased as: cityblock
minkowski(term1, term2, dimension, norm = true) click to toggle source
# File lib/hal4r.rb, line 107
def minkowski(term1, term2, dimension, norm = true)
  [term1, term2].map { |term| vector(term, norm).vector }
    .inject(:-).abs.to_f.pow(dimension).sum ** 1.fdiv(dimension)
end
Also aliased as: distance
norm(term) click to toggle source
# File lib/hal4r.rb, line 77
def norm(term)
  vector(term, true)
end
reset(window_size = window_size()) click to toggle source
# File lib/hal4r.rb, line 49
def reset(window_size = window_size())
  @idmap, @matrix, @window = Hash.idmap(-1), Matrix.new,
    Array.new(@window_size = window_size || DEFAULT_WINDOW_SIZE)

  self
end
to_a(norm = true) click to toggle source
# File lib/hal4r.rb, line 124
def to_a(norm = true)
  norm ? each_norm.to_a : each_vector.to_a
end
to_s() click to toggle source
# File lib/hal4r.rb, line 128
def to_s
  cols = [terms.unshift(nil)]

  @matrix.each_col.with_index { |col, index|
    cols << [@idmap.key(index), *col] unless col.isnull? }

  fmt = cols.map { |col|
    "%#{col.map { |val| val.to_s.length }.max}s" }.join(' ') << $/

  cols.first.each_index.map { |index|
    fmt % cols.map { |col| col[index] } }.join
end
vector(term, norm = false) click to toggle source
# File lib/hal4r.rb, line 71
def vector(term, norm = false)
  vector_i(@idmap.fetch(term), norm)
end
Also aliased as: []

Private Instance Methods

vector_i(index, norm) click to toggle source
# File lib/hal4r.rb, line 149
def vector_i(index, norm)
  @matrix.vector(index, size, norm)
end