Class: Ankusa::HBaseStorage

Inherits:
Object
  • Object
show all
Defined in:
lib/ankusa/hbase_storage.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(host = 'localhost', port = 9090, frequency_tablename = "ankusa_word_frequencies", summary_tablename = "ankusa_summary") ⇒ HBaseStorage

Returns a new instance of HBaseStorage.



8
9
10
11
12
13
14
15
# File 'lib/ankusa/hbase_storage.rb', line 8

def initialize(host='localhost', port=9090, frequency_tablename="ankusa_word_frequencies", summary_tablename="ankusa_summary")
  @hbase = HBaseRb::Client.new host, port
  @ftablename = frequency_tablename
  @stablename = summary_tablename
  @klass_word_counts = {}
  @klass_doc_counts = {}
  init_tables
end

Instance Attribute Details

#hbaseObject (readonly)

Returns the value of attribute hbase.



6
7
8
# File 'lib/ankusa/hbase_storage.rb', line 6

def hbase
  @hbase
end

Instance Method Details

#classnamesObject



17
18
19
20
21
22
23
# File 'lib/ankusa/hbase_storage.rb', line 17

def classnames
  cs = []
  summary_table.create_scanner("", "totals") { |row|
    cs << row.row.intern
  }
  cs
end

#closeObject



103
104
105
# File 'lib/ankusa/hbase_storage.rb', line 103

def close
  @hbase.close
end

#doc_count_totalsObject



99
100
101
# File 'lib/ankusa/hbase_storage.rb', line 99

def doc_count_totals
  get_summary "totals:doccount"
end

#drop_tablesObject



30
31
32
33
34
35
36
37
# File 'lib/ankusa/hbase_storage.rb', line 30

def drop_tables
  freq_table.delete
  summary_table.delete
  @stable = nil
  @ftable = nil
  @klass_word_counts = {}
  @klass_doc_counts = {}
end

#get_doc_count(klass) ⇒ Object



73
74
75
76
77
# File 'lib/ankusa/hbase_storage.rb', line 73

def get_doc_count(klass)
  @klass_doc_counts.fetch(klass) {
    @klass_doc_counts[klass] = summary_table.get(klass, "totals:doccount").first.to_i64.to_f
  }
end

#get_total_word_count(klass) ⇒ Object



67
68
69
70
71
# File 'lib/ankusa/hbase_storage.rb', line 67

def get_total_word_count(klass)
  @klass_word_counts.fetch(klass) {
    @klass_word_counts[klass] = summary_table.get(klass, "totals:wordcount").first.to_i64.to_f
  }
end

#get_vocabulary_sizesObject



63
64
65
# File 'lib/ankusa/hbase_storage.rb', line 63

def get_vocabulary_sizes
  get_summary "totals:vocabsize"
end

#get_word_counts(word) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/ankusa/hbase_storage.rb', line 49

def get_word_counts(word)
  counts = Hash.new(0)
  row = freq_table.get_row(word)
  return counts if row.length == 0

  row.first.columns.each { |colname, cell|
    classname = colname.split(':')[1].intern
    # in case untrain has been called too many times
    counts[classname] = [cell.to_i64.to_f, 0].max
  }

  counts
end

#incr_doc_count(klass, count) ⇒ Object



95
96
97
# File 'lib/ankusa/hbase_storage.rb', line 95

def incr_doc_count(klass, count)
  @klass_doc_counts[klass] = summary_table.atomic_increment klass, "totals:doccount", count
end

#incr_total_word_count(klass, count) ⇒ Object



91
92
93
# File 'lib/ankusa/hbase_storage.rb', line 91

def incr_total_word_count(klass, count)
  @klass_word_counts[klass] = summary_table.atomic_increment klass, "totals:wordcount", count
end

#incr_word_count(klass, word, count) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
# File 'lib/ankusa/hbase_storage.rb', line 79

def incr_word_count(klass, word, count)
  size = freq_table.atomic_increment word, "classes:#{klass.to_s}", count
  # if this is a new word, increase the klass's vocab size.  If the new word
  # count is 0, then we need to decrement our vocab size
  if size == count
    summary_table.atomic_increment klass, "totals:vocabsize"
  elsif size == 0
    summary_table.atomic_increment klass, "totals:vocabsize", -1        
  end
  size
end

#init_tablesObject



39
40
41
42
43
44
45
46
47
# File 'lib/ankusa/hbase_storage.rb', line 39

def init_tables
  if not @hbase.has_table? @ftablename
    @hbase.create_table @ftablename, "classes", "total"
  end

  if not @hbase.has_table? @stablename
    @hbase.create_table @stablename, "totals"
  end
end

#resetObject



25
26
27
28
# File 'lib/ankusa/hbase_storage.rb', line 25

def reset
  drop_tables
  init_tables
end