Class: Yanbi::Corpus

Inherits:
Object
  • Object
show all
Defined in:
lib/corpus.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(klass = WordBag) ⇒ Corpus

Returns a new instance of Corpus.



22
23
24
25
26
27
# File 'lib/corpus.rb', line 22

def initialize(klass=WordBag)
  @all = klass.new
  @index = nil
  @docs = []
  @bags = []
end

Instance Attribute Details

#allObject (readonly)

Returns the value of attribute all.



20
21
22
# File 'lib/corpus.rb', line 20

def all
  @all
end

#bagsObject (readonly)

Returns the value of attribute bags.



19
20
21
# File 'lib/corpus.rb', line 19

def bags
  @bags
end

#docsObject (readonly)

Returns the value of attribute docs.



18
19
20
# File 'lib/corpus.rb', line 18

def docs
  @docs
end

Instance Method Details

#add_doc(doc, comment = nil) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
# File 'lib/corpus.rb', line 48

def add_doc(doc, comment=nil)
  doc.gsub! comment, '' if comment
  doc.strip!
  
  unless doc.length.zero?
    @bags << @all.class.new(doc)
    @all.add_text doc
    @docs << doc
    @index = nil
  end
end

#add_file(docpath, delim = nil, comment = nil) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/corpus.rb', line 33

def add_file(docpath, delim=nil, comment=nil)
  infile = File.open(docpath, 'r')
  raw = infile.read
  infile.close

  raw = raw.encode("UTF-8", invalid: :replace, replace: "")
  
  if delim
    docs = raw.split(delim) 
    docs.each {|d| add_doc(d, comment)} 
  else
    add_doc(raw, comment)
  end
end

#each_docObject



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/corpus.rb', line 60

def each_doc
  before = 0
  after = 0

  @bags.each do |bag, doc|
    before += bag.words.count
    yield bag, doc
    after += bag.words.count
  end

  rebuild_all if before != after
end

#sizeObject



29
30
31
# File 'lib/corpus.rb', line 29

def size
  @docs.size
end

#to_indexObject



73
74
75
76
77
78
79
80
# File 'lib/corpus.rb', line 73

def to_index
  if @index.nil?
    w = all.words.uniq
    @index = Yanbi::Dictionary.new(w, @all.class)
  end

  @index
end