Class: Athena::Formats::Ferret

Inherits:
Athena::Formats show all
Defined in:
lib/athena/formats/ferret.rb

Defined Under Namespace

Classes: IllegalRecordElementError, NoRecordElementError

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Athena::Formats

[], convert, deferred?, formats, valid_format?

Constructor Details

#initialize(parser) ⇒ Ferret

Returns a new instance of Ferret.



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/athena/formats/ferret.rb', line 42

def initialize(parser)
  config = parser.config.dup

  case @record_element = config.delete(:__record_element)
    when String
      # fine!
    when nil
      raise NoRecordElementError, 'no record element specified'
    else
      raise IllegalRecordElementError, "illegal record element #{@record_element}"
  end

  @config = config
  @parser = parser
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



40
41
42
# File 'lib/athena/formats/ferret.rb', line 40

def config
  @config
end

#match_all_queryObject (readonly)

Returns the value of attribute match_all_query.



40
41
42
# File 'lib/athena/formats/ferret.rb', line 40

def match_all_query
  @match_all_query
end

#parserObject (readonly)

Returns the value of attribute parser.



40
41
42
# File 'lib/athena/formats/ferret.rb', line 40

def parser
  @parser
end

#record_elementObject (readonly)

Returns the value of attribute record_element.



40
41
42
# File 'lib/athena/formats/ferret.rb', line 40

def record_element
  @record_element
end

Instance Method Details

#parse(source) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/athena/formats/ferret.rb', line 58

def parse(source)
  path = source.path

  # make sure the index can be opened
  begin
    File.open(File.join(path, 'segments')) {}
  rescue Errno::ENOENT, Errno::EACCES => err
    raise "can't open index at #{path} (#{err.to_s.sub(/ - .*/, '')})"
  end

  index = ::Ferret::Index::IndexReader.new(path)
  first, last = 0, index.max_doc - 1

  # make sure we can read from the index
  begin
    index[first]
    index[last]
  rescue StandardError  # EOFError, "Not available", ...
    raise "possible Ferret version mismatch; try to set the " <<
          "FERRET_VERSION environment variable to something " <<
          "other than #{Ferret::VERSION}"
  end

  first.upto(last) { |i|
    unless index.deleted?(i)
      doc = index[i]

      Athena::Record.new(parser.block, doc[record_element]) { |record|
        config.each { |element, field_config|
          record.update(element, doc[element], field_config)
        }
      }
    end
  }

  index.num_docs
end