Class: FastFuzzy::Analyzer

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/fast_fuzzy/analyzer.rb

Constant Summary collapse

STANDARD_CHAIN =
[
  [Lucene::StandardTokenizer],
  [Lucene::StandardFilter],
  [Lucene::LowerCaseFilter],
  [Lucene::StopFilter, Lucene::StandardAnalyzer::STOP_WORDS_SET],
]

Instance Method Summary collapse

Constructor Details

#initialize(str, chain_definition = STANDARD_CHAIN) ⇒ Analyzer

Returns a new instance of Analyzer.



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/fast_fuzzy/analyzer.rb', line 13

def initialize(str, chain_definition = STANDARD_CHAIN)
  @str = str

  # first chain element must be the tokenizer, initialize it and set its reader
  definition = chain_definition.first
  clazz = definition.first
  params = definition[1..-1]
  tokenizer = clazz.new(*params)
  tokenizer.set_reader(java.io.StringReader.new(str))

  # initialize the following filters and build the stream chain
  stream = chain_definition[1..-1].inject(tokenizer) do |result, definition|
    clazz = definition.first
    params = definition[1..-1]
    clazz.new(result, *params)
  end

  # use CachingTokenFilter to allow multiple stream traversing
  @stream = Lucene::CachingTokenFilter.new(stream)
  @term_attr = @stream.addAttribute(Lucene::CharTermAttribute.java_class);
  @type_attr = @stream.addAttribute(Lucene::TypeAttribute.java_class);
end

Instance Method Details

#closeObject



45
46
47
# File 'lib/fast_fuzzy/analyzer.rb', line 45

def close
  @stream.close
end

#each(&block) ⇒ Object

implement each for Enumerable



37
38
39
40
41
42
43
# File 'lib/fast_fuzzy/analyzer.rb', line 37

def each(&block)
  @stream.reset
  while @stream.incrementToken
    yield [@term_attr.to_string, @type_attr.type]
  end
  @stream.end
end