Class: Langusta::TagExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/langusta/tag_extractor.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(tag, threshold) ⇒ TagExtractor

Returns a new instance of TagExtractor.



6
7
8
9
10
11
12
# File 'lib/langusta/tag_extractor.rb', line 6

def initialize(tag, threshold)
  @target = tag
  @threshold = threshold
  @count = 0
  @buffer = []
  @tag = nil
end

Instance Attribute Details

#bufferObject (readonly)

Returns the value of attribute buffer.



4
5
6
# File 'lib/langusta/tag_extractor.rb', line 4

def buffer
  @buffer
end

#countObject (readonly)

Returns the value of attribute count.



4
5
6
# File 'lib/langusta/tag_extractor.rb', line 4

def count
  @count
end

#tagObject

Returns the value of attribute tag.



3
4
5
# File 'lib/langusta/tag_extractor.rb', line 3

def tag
  @tag
end

#targetObject (readonly)

Returns the value of attribute target.



4
5
6
# File 'lib/langusta/tag_extractor.rb', line 4

def target
  @target
end

#thresholdObject (readonly)

Returns the value of attribute threshold.



4
5
6
# File 'lib/langusta/tag_extractor.rb', line 4

def threshold
  @threshold
end

Instance Method Details

#add(line) ⇒ Object



14
15
16
17
18
# File 'lib/langusta/tag_extractor.rb', line 14

def add(line)
  if @target == @tag && line
    @buffer += line
  end
end

#clearObject



20
21
22
23
# File 'lib/langusta/tag_extractor.rb', line 20

def clear
  @tag = nil
  @buffer = []
end

#close_tag(profile) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/langusta/tag_extractor.rb', line 25

def close_tag(profile)
  if profile && @tag == @target && @buffer.length > @threshold
    gram = NGram.new
    @buffer.each do |codepoint|
      gram.add_char(codepoint)
      (1..NGram::N_GRAM).each do |n|
        profile.add(gram.get(n))
      end
    end
    @count += 1
  end
  clear()
end