Class: Ankusa::HBaseStorage
- Inherits:
-
Object
- Object
- Ankusa::HBaseStorage
- Defined in:
- lib/ankusa/hbase_storage.rb
Instance Attribute Summary collapse
-
#hbase ⇒ Object
readonly
Returns the value of attribute hbase.
Instance Method Summary collapse
- #classnames ⇒ Object
- #close ⇒ Object
- #doc_count_totals ⇒ Object
- #drop_tables ⇒ Object
- #get_doc_count(klass) ⇒ Object
- #get_total_word_count(klass) ⇒ Object
- #get_vocabulary_sizes ⇒ Object
- #get_word_counts(word) ⇒ Object
- #incr_doc_count(klass, count) ⇒ Object
- #incr_total_word_count(klass, count) ⇒ Object
- #incr_word_count(klass, word, count) ⇒ Object
- #init_tables ⇒ Object
-
#initialize(host = 'localhost', port = 9090, frequency_tablename = "ankusa_word_frequencies", summary_tablename = "ankusa_summary") ⇒ HBaseStorage
constructor
A new instance of HBaseStorage.
- #reset ⇒ Object
Constructor Details
#initialize(host = 'localhost', port = 9090, frequency_tablename = "ankusa_word_frequencies", summary_tablename = "ankusa_summary") ⇒ HBaseStorage
Returns a new instance of HBaseStorage.
8 9 10 11 12 13 14 15 |
# File 'lib/ankusa/hbase_storage.rb', line 8 def initialize(host='localhost', port=9090, frequency_tablename="ankusa_word_frequencies", summary_tablename="ankusa_summary") @hbase = HBaseRb::Client.new host, port @ftablename = frequency_tablename @stablename = summary_tablename @klass_word_counts = {} @klass_doc_counts = {} init_tables end |
Instance Attribute Details
#hbase ⇒ Object (readonly)
Returns the value of attribute hbase.
6 7 8 |
# File 'lib/ankusa/hbase_storage.rb', line 6 def hbase @hbase end |
Instance Method Details
#classnames ⇒ Object
17 18 19 20 21 22 23 |
# File 'lib/ankusa/hbase_storage.rb', line 17 def classnames cs = [] summary_table.create_scanner("", "totals") { |row| cs << row.row.intern } cs end |
#close ⇒ Object
105 106 107 |
# File 'lib/ankusa/hbase_storage.rb', line 105 def close @hbase.close end |
#doc_count_totals ⇒ Object
101 102 103 |
# File 'lib/ankusa/hbase_storage.rb', line 101 def doc_count_totals get_summary "totals:doccount" end |
#drop_tables ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/ankusa/hbase_storage.rb', line 30 def drop_tables freq_table.delete summary_table.delete @stable = nil @ftable = nil @klass_word_counts = {} @klass_doc_counts = {} end |
#get_doc_count(klass) ⇒ Object
73 74 75 76 77 78 79 |
# File 'lib/ankusa/hbase_storage.rb', line 73 def get_doc_count(klass) @klass_doc_counts.fetch(klass) { totals = summary_table.get(klass, "totals:doccount") totals = (totals.size === 0) ? 0 : totals.first.to_i64.to_f @klass_doc_counts[klass] = totals } end |
#get_total_word_count(klass) ⇒ Object
67 68 69 70 71 |
# File 'lib/ankusa/hbase_storage.rb', line 67 def get_total_word_count(klass) @klass_word_counts.fetch(klass) { @klass_word_counts[klass] = summary_table.get(klass, "totals:wordcount").first.to_i64.to_f } end |
#get_vocabulary_sizes ⇒ Object
63 64 65 |
# File 'lib/ankusa/hbase_storage.rb', line 63 def get_vocabulary_sizes get_summary "totals:vocabsize" end |
#get_word_counts(word) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/ankusa/hbase_storage.rb', line 49 def get_word_counts(word) counts = Hash.new(0) row = freq_table.get_row(word) return counts if row.length == 0 row.first.columns.each { |colname, cell| classname = colname.split(':')[1].intern # in case untrain has been called too many times counts[classname] = [cell.to_i64.to_f, 0].max } counts end |
#incr_doc_count(klass, count) ⇒ Object
97 98 99 |
# File 'lib/ankusa/hbase_storage.rb', line 97 def incr_doc_count(klass, count) @klass_doc_counts[klass] = summary_table.atomic_increment klass, "totals:doccount", count end |
#incr_total_word_count(klass, count) ⇒ Object
93 94 95 |
# File 'lib/ankusa/hbase_storage.rb', line 93 def incr_total_word_count(klass, count) @klass_word_counts[klass] = summary_table.atomic_increment klass, "totals:wordcount", count end |
#incr_word_count(klass, word, count) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/ankusa/hbase_storage.rb', line 81 def incr_word_count(klass, word, count) size = freq_table.atomic_increment word, "classes:#{klass.to_s}", count # if this is a new word, increase the klass's vocab size. If the new word # count is 0, then we need to decrement our vocab size if size == count summary_table.atomic_increment klass, "totals:vocabsize" elsif size == 0 summary_table.atomic_increment klass, "totals:vocabsize", -1 end size end |
#init_tables ⇒ Object
39 40 41 42 43 44 45 46 47 |
# File 'lib/ankusa/hbase_storage.rb', line 39 def init_tables unless @hbase.has_table? @ftablename @hbase.create_table @ftablename, "classes", "total" end unless @hbase.has_table? @stablename @hbase.create_table @stablename, "totals" end end |
#reset ⇒ Object
25 26 27 28 |
# File 'lib/ankusa/hbase_storage.rb', line 25 def reset drop_tables init_tables end |