Class: HeyDan::Import

Inherits:
Object
  • Object
show all
Defined in:
lib/heydan/import.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Attribute Details

#clientObject

Returns the value of attribute client.



6
7
8
# File 'lib/heydan/import.rb', line 6

def client
  @client
end

Class Method Details

.check_indexObject



17
18
19
# File 'lib/heydan/import.rb', line 17

def check_index
  client.indices.exists? index: index
end

.clientObject



9
10
11
# File 'lib/heydan/import.rb', line 9

def client
  @client ||= Elasticsearch::Client.new host: HeyDan.elasticsearch[:url], log: false
end

.create_indexObject



21
22
23
# File 'lib/heydan/import.rb', line 21

def create_index
  client.indices.create index: index
end

.indexObject



13
14
15
# File 'lib/heydan/import.rb', line 13

def index
  @index ||= 'jurisdictions'
end

.process(number = 100) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/heydan/import.rb', line 25

def process(number=100)
  create_index unless check_index
  total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
  files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
  a=0
  b=number
  progress = ProgressBar.create(:title => "Importing #{files.size} jurisdictions into Elastic Search", :starting_at => a, :total => files.size)
  while true do
    @bulk = []
    b=( files.size - b < number ? -1 : a + number)
    files[a..b].each do |file|
      jf = HeyDan::JurisdictionFile.new(name: file)
      @bulk << { index:  { _index: 'jurisdictions', _type: jf.type, _id: jf.hash_id, data: jf.get_json } } 
    end
    @client.bulk refresh: true, body: @bulk; nil    
    a = b + 1
    if b == -1
      progress.finish
      break 
    else
      progress.progress = a 
    end
  end
end

.process_in_parallel(number = 100) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/heydan/import.rb', line 50

def process_in_parallel(number=100)
  create_index unless check_index
  total = Dir.glob("#{HeyDan.folders[:jurisdictions]}/*").size
  files= Dir.glob("#{HeyDan.folders[:jurisdictions]}/*")
  results = Parallel.map(files.each_slice(number).to_a) do |chunk|
    chunk.each do |file|
      jf = HeyDan::JurisdictionFile.new(name: file)
      @client.index index: 'jurisdictions', type: jf.type, id: jf.hash_id, body: jf.get_json
    end
  end
end