Class: Lingo::Attendee::LsiFilter
Constant Summary
DEFAULT_SKIP, TERMINALS
Constants included
from Language
Language::CHAR_PUNCT, Language::LA_SORTORDER, Language::TA_ABBREVIATION, Language::TA_HELP, Language::TA_HTML, Language::TA_NUMBER, Language::TA_OTHER, Language::TA_PUNCTUATION, Language::TA_SKIP, Language::TA_SPACE, Language::TA_URL, Language::TA_WIKI, Language::TA_WORD, Language::WA_COMPOUND, Language::WA_IDENTIFIED, Language::WA_MULTIWORD, Language::WA_SEQUENCE, Language::WA_UNKMULPART, Language::WA_UNKNOWN, Language::WA_UNSET
Instance Attribute Summary
#lingo, #subscribers
Instance Method Summary
collapse
enhance, #initialize
#command, #forward, #initialize
Instance Method Details
#control(cmd) ⇒ Object
53
54
55
|
# File 'lib/lingo/attendee/lsi_filter.rb', line 53
def control(cmd, *)
:skip_command if cmd == :EOL
end
|
#control_deferred(cmd) ⇒ Object
57
58
59
|
# File 'lib/lingo/attendee/lsi_filter.rb', line 57
def control_deferred(cmd, *)
@docnum += 1 if TERMINALS.include?(cmd)
end
|
#init ⇒ Object
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
# File 'lib/lingo/attendee/lsi_filter.rb', line 33
def init
require_lib('lsi4r')
@lex = get_re('lexicals', '[sy]')
@skip = get_ary('skip', DEFAULT_SKIP, :upcase)
@transform = get_key('transform', Lsi4R::DEFAULT_TRANSFORM)
@cutoff = get_flo('cut', Lsi4R::DEFAULT_CUTOFF)
@min = get_flo('min', false)
@abs = get_flo('abs', false)
@nul = get_flo('nul', false)
@new = get_key('new', true)
@sort = get_key('sort', false)
@sort.downcase! if @sort.respond_to?(:downcase!)
@docnum, @vectors = 0, Hash.new { |h, k| h[k] = [] }
end
|
#process(obj) ⇒ Object
61
62
63
64
65
66
67
|
# File 'lib/lingo/attendee/lsi_filter.rb', line 61
def process(obj)
if obj.is_a?(Word) && !@skip.include?(obj.attr)
vec = []
obj.each_lex(@lex) { |lex| vec << Unicode.downcase(lex.form) }
@vectors[@docnum].concat(vec) unless vec.empty?
end
end
|