Class: Soulheart::Loader

Inherits:
Base
  • Object
show all
Defined in:
lib/soulheart/loader.rb

Instance Attribute Summary

Attributes inherited from Base

#type

Instance Method Summary collapse

Methods inherited from Base

#cache_duration, #cache_id, #categories_id, #category_combos, #category_combos_id, #category_id, #combinatored_category_array, #hidden_categories_id, #hidden_category_array, #no_query_id, #normalize_type_id, #redis, #results_hashes_id, #set_category_combos_array, #sorted_category_array

Methods included from Helpers

#normalize, #prefixes_for_phrase

Constructor Details

#initialize(defaults = {}) ⇒ Loader

Returns a new instance of Loader.



3
4
5
6
# File 'lib/soulheart/loader.rb', line 3

def initialize(defaults={})
  @no_all           = defaults[:no_all]
  @no_combinatorial = defaults[:no_combinatorial]
end

Instance Method Details

#add_item(item, category_base_id = nil, cleaned = false) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/soulheart/loader.rb', line 117

def add_item(item, category_base_id=nil, cleaned=false)      
  item = clean(item) unless cleaned
  category_base_id ||= category_id(item['category'])
  priority = (-item['priority'])
  redis.pipelined do
    redis.zadd(no_query_id(category_base_id), priority, item['term'])  # Add to master set for queryless searches
    # store the raw data in a separate key to reduce memory usage, if it's cleaned it's done
    redis.hset(results_hashes_id, item['term'], MultiJson.encode(item['data'])) unless cleaned
    phrase = ([item['term']] + (item['aliases'] || [])).join(' ')
    # Store all the prefixes
    prefixes_for_phrase(phrase).each do |p|
      redis.sadd(Soulheart.base_id, p) unless cleaned # remember prefix in a master set
      # store the normalized term in the index for each of the categories
      redis.zadd("#{category_base_id}#{p}", priority, item['term'])
    end
  end
  item
end

#add_to_categories_array(category) ⇒ Object



22
23
24
25
26
27
28
29
# File 'lib/soulheart/loader.rb', line 22

def add_to_categories_array(category)
  if @no_combinatorial 
    return if redis.smembers(hidden_categories_id).include?(category)
    redis.sadd hidden_categories_id, category
  elsif !redis.smembers(categories_id).include?(category)
    redis.sadd categories_id, category
  end
end

#clean(item) ⇒ Object



108
109
110
111
112
113
114
115
# File 'lib/soulheart/loader.rb', line 108

def clean(item)
  item = clean_hash(item)
  item.keys.select{ |k| !%w(category priority term aliases data).include?(k) }.each do |key|
    item['data'].merge!({"#{key}" => item.delete(key)})
  end
  add_to_categories_array(item['category'])
  item
end

#clean_hash(item) ⇒ Object



99
100
101
102
103
104
105
106
# File 'lib/soulheart/loader.rb', line 99

def clean_hash(item)
  item['aliases'] = item['aliases'].split(',').map(&:strip) if item['aliases'] && !item['aliases'].kind_of?(Array)
  fail ArgumentError, 'Items must have text' unless item['text']
  default_items_hash(item.delete('text'), item.delete('category'))
    .tap { |i| i['data'].merge!(item.delete('data')) if item['data'] }
    .tap { |i| i['priority'] = item.delete('priority').to_f if item['priority'] }
    .merge item
end

#clear(remove_results = false) ⇒ Object



67
68
69
70
71
72
# File 'lib/soulheart/loader.rb', line 67

def clear(remove_results=false)
  category_combos.each {|cat| delete_data(category_id(cat)) }
  delete_categories
  delete_data
  remove_results_hash if remove_results
end

#default_items_hash(text, category) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/soulheart/loader.rb', line 8

def default_items_hash(text, category)
  category ||= 'default'
  {
    'category' => normalize(category),
    'priority' => 100,
    'term' => normalize(text),
    'aliases' => [],
    'data' => {
      'text' => text,
      'category' => category
    }
  }
end

#delete_categoriesObject



31
32
33
34
35
# File 'lib/soulheart/loader.rb', line 31

def delete_categories
  redis.expire category_combos_id, 0
  redis.expire categories_id, 0
  redis.expire hidden_categories_id, 0
end

#delete_data(id = "#{Soulheart.base_id}:") ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/soulheart/loader.rb', line 42

def delete_data(id="#{Soulheart.base_id}:")
  # delete the sorted sets for this type
  phrases = redis.smembers(Soulheart.base_id)
  redis.pipelined do
    phrases.each do |p|
      redis.del("#{id}#{p}")
    end
    redis.del(id)
  end

  # Redis can continue serving cached requests while the reload is
  # occurring. Some requests may be cached incorrectly as empty set (for requests
  # which come in after the above delete, but before the loading completes). But
  # everything will work itself out as soon as the cache expires again.
end

#load(items) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/soulheart/loader.rb', line 74

def load(items)
  Soulheart.stop_words # Load stop words so we don't pipeline redis_stop_words accidentally
  i = 0
  items.each do |item|
    item.replace(add_item(item)) # Replace with item return so we know we have category_id
    i += 1
  end
  set_category_combos_array.each do |category_combo|
    items.each do |item|
      if category_combo == item['category']
        next
      elsif category_combo == 'all'
        next if @no_all
      elsif @no_combinatorial
        next
      elsif !category_combo.match(item['category']) 
        next
      end
      add_item(item, category_id(category_combo), true) # send it base
      i += 1
    end
  end
  puts "Total items (including combinatorial categories):    #{i}"
end

#remove_results_hashObject



58
59
60
61
62
63
64
65
# File 'lib/soulheart/loader.rb', line 58

def remove_results_hash
  # delete the data store
  # We don't do this every time we clear because because it breaks the caching feature. 
  # The option to clear this is only called in testing right now. 
  # There should be an option to clear it other times though.
  redis.expire results_hashes_id, 0
  redis.del(results_hashes_id)
end

#reset_categories(categories) ⇒ Object



37
38
39
40
# File 'lib/soulheart/loader.rb', line 37

def reset_categories(categories)
  delete_categories
  redis.sadd categories_id, categories
end