class Athena::Formats::Ferret

Private Instance Methods

parse(input, &block) click to toggle source
# File lib/athena/formats/ferret.rb, line 48
def parse(input, &block)
  path = input.path

  # make sure the index can be opened
  begin
    File.open(File.join(path, 'segments')) {}
  rescue Errno::ENOENT, Errno::EACCES => err
    raise "can't open index at #{path} (#{err.to_s.sub(/ - .*/, '')})"
  end

  index = ::Ferret::Index::IndexReader.new(path)
  first, last = 0, index.max_doc - 1

  # make sure we can read from the index
  begin
    index[first]
    index[last]
  rescue StandardError  # EOFError, "Not available", ...
    raise "possible Ferret version mismatch; try to set the " <<
          "FERRET_VERSION environment variable to something " <<
          "other than #{Ferret::VERSION}"
  end

  first.upto(last) { |i|
    unless index.deleted?(i)
      doc = index[i]

      Athena::Record.new(doc[record_element], block) { |record|
        config.each { |element, field_config|
          record.update(element, doc[element], field_config)
        }
      }
    end
  }

  index.num_docs
end