Class: Lingo::Attendee::LsiFilter

Inherits:
DeferredAttendee show all
Defined in:
lib/lingo/attendee/lsi_filter.rb

Constant Summary

Constants inherited from Lingo::Attendee

DEFAULT_SKIP, TERMINALS

Constants included from Language

Language::CHAR_PUNCT, Language::LA_SORTORDER, Language::TA_ABBREVIATION, Language::TA_HELP, Language::TA_HTML, Language::TA_NUMBER, Language::TA_OTHER, Language::TA_PUNCTUATION, Language::TA_SKIP, Language::TA_SPACE, Language::TA_URL, Language::TA_WIKI, Language::TA_WORD, Language::WA_COMPOUND, Language::WA_IDENTIFIED, Language::WA_MULTIWORD, Language::WA_SEQUENCE, Language::WA_UNKMULPART, Language::WA_UNKNOWN, Language::WA_UNSET

Instance Attribute Summary

Attributes inherited from Lingo::Attendee

#lingo, #subscribers

Instance Method Summary collapse

Methods inherited from DeferredAttendee

enhance, #initialize

Methods inherited from Lingo::Attendee

#command, #forward, #initialize

Constructor Details

This class inherits a constructor from Lingo::DeferredAttendee

Instance Method Details

#control(cmd) ⇒ Object



53
54
55
# File 'lib/lingo/attendee/lsi_filter.rb', line 53

def control(cmd, *)
  :skip_command if cmd == :EOL
end

#control_deferred(cmd) ⇒ Object



57
58
59
# File 'lib/lingo/attendee/lsi_filter.rb', line 57

def control_deferred(cmd, *)
  @docnum += 1 if TERMINALS.include?(cmd)
end

#initObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/lingo/attendee/lsi_filter.rb', line 33

def init
  require_lib('lsi4r')

  @lex  = get_re('lexicals', '[sy]')
  @skip = get_ary('skip', DEFAULT_SKIP, :upcase)

  @transform = get_key('transform', Lsi4R::DEFAULT_TRANSFORM)
  @cutoff    = get_flo('cut',       Lsi4R::DEFAULT_CUTOFF)

  @min = get_flo('min', false)
  @abs = get_flo('abs', false)
  @nul = get_flo('nul', false)
  @new = get_key('new', true)

  @sort = get_key('sort', false)
  @sort.downcase! if @sort.respond_to?(:downcase!)

  @docnum, @vectors = 0, Hash.new { |h, k| h[k] = [] }
end

#process(obj) ⇒ Object



61
62
63
64
65
66
67
# File 'lib/lingo/attendee/lsi_filter.rb', line 61

def process(obj)
  if obj.is_a?(Word) && !@skip.include?(obj.attr)
    vec = []
    obj.each_lex(@lex) { |lex| vec << Unicode.downcase(lex.form) }
    @vectors[@docnum].concat(vec) unless vec.empty?
  end
end