Class: WordTree::Book
- Inherits:
-
Object
- Object
- WordTree::Book
- Defined in:
- lib/wordtree/book.rb
Class Method Summary collapse
Instance Method Summary collapse
- #all_ngrams ⇒ Object
- #calculate_simhash ⇒ Object
- #content_clean(wrap = 120) ⇒ Object
- #content_size ⇒ Object
- #count_ngrams(n = 1) ⇒ Object
- #default_id ⇒ Object
- #each_ngram(n = 1, &block) ⇒ Object
-
#initialize(*args) ⇒ Book
constructor
A new instance of Book.
- #metadata ⇒ Object
- #ngrams(n = 1) ⇒ Object
- #set_ngrams(n, lookup) ⇒ Object
Constructor Details
#initialize(*args) ⇒ Book
Returns a new instance of Book.
24 25 26 27 |
# File 'lib/wordtree/book.rb', line 24 def initialize(*args) super @ngrams = {} end |
Class Method Details
.create(id, metadata, content) ⇒ Object
29 30 31 |
# File 'lib/wordtree/book.rb', line 29 def self.create(id, , content) new(.merge("id" => id, "content" => content)) end |
Instance Method Details
#all_ngrams ⇒ Object
68 69 70 |
# File 'lib/wordtree/book.rb', line 68 def all_ngrams @ngrams end |
#calculate_simhash ⇒ Object
81 82 83 |
# File 'lib/wordtree/book.rb', line 81 def calculate_simhash content ? content_clean.simhash(:split_by => /\s/) : nil end |
#content_clean(wrap = 120) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/wordtree/book.rb', line 41 def content_clean(wrap=120) if @content_clean_wrap != wrap # Memoize content_clean (using last wrap size) @content_clean_wrap = wrap @content_clean = TextUtils.clean_text(content, wrap) end @content_clean end |
#content_size ⇒ Object
50 51 52 |
# File 'lib/wordtree/book.rb', line 50 def content_size content ? content.size : nil end |
#count_ngrams(n = 1) ⇒ Object
72 73 74 75 76 77 78 79 |
# File 'lib/wordtree/book.rb', line 72 def count_ngrams(n=1) {}.tap do |tally| each_ngram(n) do |ngram| tally[ngram] ||= 0 tally[ngram] += 1 end end end |
#default_id ⇒ Object
33 34 35 |
# File 'lib/wordtree/book.rb', line 33 def default_id archive_org_id end |
#each_ngram(n = 1, &block) ⇒ Object
54 55 56 |
# File 'lib/wordtree/book.rb', line 54 def each_ngram(n=1, &block) TextUtils.each_ngram(content_clean, n, &block) end |
#metadata ⇒ Object
37 38 39 |
# File 'lib/wordtree/book.rb', line 37 def attributes.select{ |k,v| !v.nil? && k != :content } end |
#ngrams(n = 1) ⇒ Object
63 64 65 66 |
# File 'lib/wordtree/book.rb', line 63 def ngrams(n=1) # Memoize ngram counts @ngrams[n] ||= count_ngrams(n) end |
#set_ngrams(n, lookup) ⇒ Object
58 59 60 61 |
# File 'lib/wordtree/book.rb', line 58 def set_ngrams(n, lookup) raise ArgumentError, "must be a Hash" unless lookup.is_a?(Hash) @ngrams[n] = lookup end |