class PMS::Index
Constants
- DEFAULT_LSI
- TOKEN_RE
Attributes
entries[R]
index[R]
input[R]
Public Class Methods
new(input, options = {})
click to toggle source
# File lib/pms/index.rb, line 39 def initialize(input, options = {}) @input = input.respond_to?(:each) ? input : input.is_a?(String) ? input.each_line : raise(ArgumentError, 'input must implement #each') build_index(options) end
Public Instance Methods
doc(doc_num)
click to toggle source
# File lib/pms/index.rb, line 74 def doc(doc_num) documents([doc_num]).first end
Also aliased as: []
doc_nums(token)
click to toggle source
# File lib/pms/index.rb, line 61 def doc_nums(token) doc_nums_with_positions(token).keys end
Also aliased as: results
doc_nums_with_positions(token)
click to toggle source
# File lib/pms/index.rb, line 46 def doc_nums_with_positions(token) case token when String index[mangle_token(token)] when Regexp index.each_with_object({}) { |(key, value), hash| hash.update(value) { |_, old, new| old | new } if key =~ token } else raise TypeError, "String or Regexp expected, got #{token.class}" end.each_value(&:compact!) end
Also aliased as: results_with_positions
documents(doc_nums = default = true)
click to toggle source
# File lib/pms/index.rb, line 67 def documents(doc_nums = default = true) @documents ||= get_documents default ? @documents : @documents.values_at(*doc_nums) end
Also aliased as: matches
Private Instance Methods
build_index(options)
click to toggle source
# File lib/pms/index.rb, line 82 def build_index(options) if lsi = options[:lsi] require 'lsi4r' lsi = DEFAULT_LSI if lsi == true map = Hash.new { |h, k| h[k] = [] } end @documents, @entries, doc_num = nil, [], -1 index = Hash.new { |h, k| h[k] = Hash.new { |i, j| i[j] = [] } } input.each { |doc| @entries << doc_num += 1 pos = -1 each_token(doc) { |token| index[term = mangle_token(token)][doc_num] << pos += 1 map[doc_num] << term if map } } Lsi4R.each_norm(map, min: lsi, new: true) { |d, k, _| index[mangle_token(k)][d.key] << nil } if lsi @index = index end
each_token(doc, &block)
click to toggle source
# File lib/pms/index.rb, line 118 def each_token(doc, &block) doc.scan(TOKEN_RE, &block) end
get_documents()
click to toggle source
# File lib/pms/index.rb, line 110 def get_documents input.rewind if input.respond_to?(:rewind) docs = [] input.each { |doc| docs << doc } docs end
mangle_token(token)
click to toggle source
# File lib/pms/index.rb, line 122 def mangle_token(token) Unicode.downcase(token) end