class PageRank
Constants
- DEFAULT_CONVERGENCE
- DEFAULT_DAMPING_FACTOR
- DEFAULT_MAX_ITERATIONS
- VERSION
Attributes
convergence[RW]
damping_factor[RW]
dangling_nodes[R]
divergence[R]
initialization_vector[R]
iterations[R]
matrix[R]
max_iterations[RW]
ranking[R]
size[R]
Public Class Methods
from_hash(hash, *args)
click to toggle source
# File lib/page_rank.rb, line 47 def from_hash(hash, *args) s = hash.to_a.flatten.uniq new(s.size, *args).seed(s.sort_by { |k| sortkey(k) }) << hash end
from_json(path, *args)
click to toggle source
# File lib/page_rank.rb, line 52 def from_json(path, *args) from_hash(JSON.parse(File.read(path)), *args) end
new(size, options = {})
click to toggle source
# File lib/page_rank.rb, line 66 def initialize(size, options = {}) @size, @idmap, @matrix, @iterations, @ranking, @divergence = size, Hash.idmap(-1), GSL::Matrix.zeros(size), 0, [], [] @dangling_nodes, @initialization_vector = GSL::Vector::Col.alloc(size).set_all(1), GSL::Vector::Col.alloc(size).set_all(Rational(1, size)) @convergence = options.fetch(:convergence, DEFAULT_CONVERGENCE) @max_iterations = options.fetch(:max_iterations, DEFAULT_MAX_ITERATIONS) @damping_factor = options.fetch(:damping_factor, DEFAULT_DAMPING_FACTOR) end
Private Class Methods
sortkey(k)
click to toggle source
# File lib/page_rank.rb, line 58 def sortkey(k) Float(k) rescue TypeError, ArgumentError k end
Public Instance Methods
<<(hash)
click to toggle source
# File lib/page_rank.rb, line 110 def <<(hash) hash.each { |k, v| self[k] = v } self end
[]=(k, v)
click to toggle source
# File lib/page_rank.rb, line 115 def []=(k, v) w = Rational(1, v.size) dangling_nodes[i = id(k)] = 0 v.each { |l| matrix[i, id(l)] = w } end
each() { |key(k), map! { |l| key(l) }| ... }
click to toggle source
# File lib/page_rank.rb, line 133 def each return enum_for(__method__) unless block_given? index = -1 matrix.each_row { |r| k, v = index += 1, r.where.to_a yield key(k), v.map! { |l| key(l) }.sort! unless v.empty? } self end
inspect()
click to toggle source
# File lib/page_rank.rb, line 95 def inspect s = '#<%s:0x%x>' % [self.class, object_id] %w[size count convergence max_iterations damping_factor].each { |v| s.insert(-2, " @#{v}=#{send(v).inspect}") } s end
rank(personalization = true)
click to toggle source
# File lib/page_rank.rb, line 121 def rank(personalization = true) iv, v = initialization_vector.dup, personalization order(iterate(case v when true then iv when 0 then iv.set_zero when Float then iv.set_all(v) when Integer then iv.set_all(Rational(1, v)) else v end)) unless empty? end
seed(list)
click to toggle source
# File lib/page_rank.rb, line 105 def seed(list) list.each { |k| id(k) } self end
to_json()
click to toggle source
# File lib/page_rank.rb, line 146 def to_json JSON.fast_generate(to_h) end
to_table()
click to toggle source
# File lib/page_rank.rb, line 150 def to_table matrix.to_a.map.with_index { |r, i| [key(i) || i + 1, r.map.with_index { |v, j| [key(j) || j + 1, v] }] } end
Private Instance Methods
iterate(pv)
click to toggle source
pv ? Langville/Meyer 2012 : Page/Brin 1998
# File lib/page_rank.rb, line 158 def iterate(pv) iter, prev, diff = [], initialization_vector.trans, @divergence = [] d, dn = 1 - df = damping_factor, dangling_nodes d /= size.to_f unless pv max_iterations.times { curr = df * prev * matrix iter << curr += pv ? (df * prev * dn + d) * pv : d diff << [delta = (curr - prev).asum] delta < convergence ? break : prev = curr } @iterations = iter.size iter end
order(iter)
click to toggle source
# File lib/page_rank.rb, line 177 def order(iter) order, ranking, prev = [], @ranking = [], @idmap.to_a iter.each { |t| ranking << prev = @idmap.map { |k, i| [k, i, t[i]] } .sort_by { |k, _, v| [-v, prev.index(prev.assoc(k))] } } prev = prev.map { |k, i,| order << k; i } @divergence.zip(ranking) { |diff, rank| diff << prev.zip(rank.map { |r| r.delete_at(1) }).corr } order end