Class: Athena::Formats::Ferret

Inherits:
Base
  • Object
show all
Defined in:
lib/athena/formats/ferret.rb

Defined Under Namespace

Classes: IllegalRecordElementError, NoRecordElementError

Instance Method Summary collapse

Methods inherited from Base

#convert, #deferred?, formats, valid_format?, #wrap

Instance Method Details

#parse(source, &block) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/athena/formats/ferret.rb', line 60

def parse(source, &block)
  path = source.path

  # make sure the index can be opened
  begin
    File.open(File.join(path, 'segments')) {}
  rescue Errno::ENOENT, Errno::EACCES => err
    raise "can't open index at #{path} (#{err.to_s.sub(/ - .*/, '')})"
  end

  index = ::Ferret::Index::IndexReader.new(path)
  first, last = 0, index.max_doc - 1

  # make sure we can read from the index
  begin
    index[first]
    index[last]
  rescue StandardError  # EOFError, "Not available", ...
    raise "possible Ferret version mismatch; try to set the " <<
          "FERRET_VERSION environment variable to something " <<
          "other than #{Ferret::VERSION}"
  end

  first.upto(last) { |i|
    unless index.deleted?(i)
      doc = index[i]

      Athena::Record.new(doc[record_element], block) { |record|
        config.each { |element, field_config|
          record.update(element, doc[element], field_config)
        }
      }
    end
  }

  index.num_docs
end