Class: FastRI::FullTextIndex

Inherits:
Object
  • Object
show all
Defined in:
lib/fastri/full_text_index.rb

Defined Under Namespace

Classes: Result

Constant Summary collapse

MAX_QUERY_SIZE =
20
MAX_REGEXP_MATCH_SIZE =
255
DEFAULT_OPTIONS =
{
  :max_query_size => MAX_QUERY_SIZE,
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(type, fulltext, sarray, options) ⇒ FullTextIndex

Returns a new instance of FullTextIndex.



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/fastri/full_text_index.rb', line 47

def initialize(type, fulltext, sarray, options)
  options = DEFAULT_OPTIONS.merge(options)
  case type
  when :io
    @fulltext_IO = fulltext
    @sarray_IO   = sarray
  when :filenames
    @fulltext_fname = fulltext
    @sarray_fname   = sarray
  else raise "Unknown type"
  end
  @type = type
  @max_query_size = options[:max_query_size]
  check_magic
end

Instance Attribute Details

#max_query_sizeObject (readonly)

Returns the value of attribute max_query_size.



46
47
48
# File 'lib/fastri/full_text_index.rb', line 46

def max_query_size
  @max_query_size
end

Class Method Details

.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {}) ⇒ Object



42
43
44
# File 'lib/fastri/full_text_index.rb', line 42

def self.new_from_filenames(fulltext_fname, suffix_arrray_fname, options = {})
  new(:filenames, fulltext_fname, suffix_arrray_fname, options)
end

.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {}) ⇒ Object



38
39
40
# File 'lib/fastri/full_text_index.rb', line 38

def self.new_from_ios(fulltext_IO, suffix_arrray_IO, options = {})
  new(:io, fulltext_IO, suffix_arrray_IO, options)
end

Instance Method Details

#fetch_data(index, size, offset = 0) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/fastri/full_text_index.rb', line 136

def fetch_data(index, size, offset = 0)
  raise "Bad offset" unless offset <= 0
  get_fulltext_IO do |fulltextIO|
    get_sarray_IO do |sarrayIO|
      base = index_to_offset(sarrayIO, index)
      actual_offset = offset
      newsize = size
      if base + offset < 0    # at the beginning
        excess        = (base + offset).abs   # remember offset is < 0
        newsize       = size - excess
        actual_offset = offset + excess
      end
      str  = get_string(sarrayIO, fulltextIO, index, newsize, offset)
      from = (str.rindex("\0", -actual_offset) || -1) + 1
      to   = (str.index("\0", -actual_offset) || 0) - 1
      str[from..to]
    end
  end
end

#lookup(term) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/fastri/full_text_index.rb', line 63

def lookup(term)
  get_fulltext_IO do |fulltextIO|
    get_sarray_IO do |sarrayIO|
      case sarrayIO
      when StringIO
        num_suffixes = sarrayIO.string.size / 4 - 1
      else
        num_suffixes = sarrayIO.stat.size / 4 - 1
      end

      index, offset = binary_search(sarrayIO, fulltextIO, term, 0, num_suffixes)
      if offset
        fulltextIO.pos = offset
        path,  = (fulltextIO)
        return Result.new(self, term, index, path, ) if path
      else
        nil
      end
    end
  end
end

#next_match(result, term_or_regexp = "") ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/fastri/full_text_index.rb', line 85

def next_match(result, term_or_regexp = "")
  case term_or_regexp
  when String;  size = [result.query.size, term_or_regexp.size].max
  when Regexp;  size = MAX_REGEXP_MATCH_SIZE
  end
  get_fulltext_IO do |fulltextIO|
    get_sarray_IO do |sarrayIO|
      idx = result.index
      loop do
        idx += 1
        str = get_string(sarrayIO, fulltextIO, idx, size)
        upto = str.index("\0")
        str = str[0, upto] if upto
        break unless str.index(result.query) == 0
        if str[term_or_regexp]
          fulltextIO.pos = index_to_offset(sarrayIO, idx)
          path,  = (fulltextIO)
          return Result.new(self, result.query, idx, path, ) if path
        end
      end
    end
  end
end

#next_matches(result, term_or_regexp = "") ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/fastri/full_text_index.rb', line 109

def next_matches(result, term_or_regexp = "")
  case term_or_regexp
  when String;  size = [result.query.size, term_or_regexp.size].max
  when Regexp;  size = MAX_REGEXP_MATCH_SIZE
  end
  ret = []
  get_fulltext_IO do |fulltextIO|
    get_sarray_IO do |sarrayIO|
      idx = result.index
      loop do
        idx += 1
        str = get_string(sarrayIO, fulltextIO, idx, size)
        upto = str.index("\0")
        str = str[0, upto] if upto
        break unless str.index(result.query) == 0
        if str[term_or_regexp]
          fulltextIO.pos = index_to_offset(sarrayIO, idx)
          path,  = (fulltextIO)
          ret << Result.new(self, result.query, idx, path, ) if path
        end
      end
    end
  end

  ret
end