Class: Yanbi::Corpus
- Inherits:
-
Object
- Object
- Yanbi::Corpus
- Defined in:
- lib/corpus.rb
Instance Attribute Summary collapse
-
#all ⇒ Object
readonly
Returns the value of attribute all.
-
#bags ⇒ Object
readonly
Returns the value of attribute bags.
-
#docs ⇒ Object
readonly
Returns the value of attribute docs.
Instance Method Summary collapse
- #add_doc(doc, comment = nil) ⇒ Object
- #add_file(docpath, delim = nil, comment = nil) ⇒ Object
- #each_doc ⇒ Object
-
#initialize(klass = WordBag) ⇒ Corpus
constructor
A new instance of Corpus.
- #size ⇒ Object
- #to_index ⇒ Object
Constructor Details
#initialize(klass = WordBag) ⇒ Corpus
Returns a new instance of Corpus.
22 23 24 25 26 27 |
# File 'lib/corpus.rb', line 22 def initialize(klass=WordBag) @all = klass.new @index = nil @docs = [] @bags = [] end |
Instance Attribute Details
#all ⇒ Object (readonly)
Returns the value of attribute all.
20 21 22 |
# File 'lib/corpus.rb', line 20 def all @all end |
#bags ⇒ Object (readonly)
Returns the value of attribute bags.
19 20 21 |
# File 'lib/corpus.rb', line 19 def bags @bags end |
#docs ⇒ Object (readonly)
Returns the value of attribute docs.
18 19 20 |
# File 'lib/corpus.rb', line 18 def docs @docs end |
Instance Method Details
#add_doc(doc, comment = nil) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/corpus.rb', line 48 def add_doc(doc, comment=nil) doc.gsub! comment, '' if comment doc.strip! unless doc.length.zero? @bags << @all.class.new(doc) @all.add_text doc @docs << doc @index = nil end end |
#add_file(docpath, delim = nil, comment = nil) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/corpus.rb', line 33 def add_file(docpath, delim=nil, comment=nil) infile = File.open(docpath, 'r') raw = infile.read infile.close raw = raw.encode("UTF-8", invalid: :replace, replace: "") if delim docs = raw.split(delim) docs.each {|d| add_doc(d, comment)} else add_doc(raw, comment) end end |
#each_doc ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/corpus.rb', line 60 def each_doc before = 0 after = 0 @bags.each do |bag, doc| before += bag.words.count yield bag, doc after += bag.words.count end rebuild_all if before != after end |
#size ⇒ Object
29 30 31 |
# File 'lib/corpus.rb', line 29 def size @docs.size end |
#to_index ⇒ Object
73 74 75 76 77 78 79 80 |
# File 'lib/corpus.rb', line 73 def to_index if @index.nil? w = all.words.uniq @index = Yanbi::Dictionary.new(w, @all.class) end @index end |