Module: Pure::Extractor

Defined in:
lib/pure/extractor.rb,
lib/pure/extractor/version.rb,
lib/pure/extractor/configure_puree.rb,
lib/pure/extractor/commands/pure_command.rb,
lib/pure/extractor/commands/pure_extractor.rb

Defined Under Namespace

Modules: Commands, ConfigurePuree

Constant Summary collapse

VERSION =
"1.1.0"

Class Method Summary collapse

Class Method Details

.delete_keys_for_type(type, item) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/pure/extractor.rb', line 96

def self.delete_keys_for_type type, item
  
  keys = []
  nested_keys = {}
  
  case type
    
  when :dataset
    
    keys = ["keyword", "file", "associated", "link", "spatial"]
    nested_keys = { "person" => ["external", "other"] }
    
  end
  
  keys.each do |key|
    item.delete(key)
  end
  
  nested_keys.each do |key, attribute|
    item[key].delete(attribute)
  end
  
  item
  
end

.extract(type, chunk_size, output_directory) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/pure/extractor.rb', line 9

def self.extract type, chunk_size, output_directory
  
  collection = Puree::Collection.new resource: type
  
  collection_count = collection.count
  
  puts collection_count
  
  progress_bar = ProgressBar.create(format: "%a %e %b\u{15E7}%i %p%% %t", progress_mark: ' ', remainder_mark: "\u{FF65}", total: collection_count)
  
  offset = 0
  file_id = 0

  if chunk_size.nil? || chunk_size.empty?
    chunk_size = 200
  end

  chunk_size = chunk_size.to_i
  
  while offset < collection_count do

    file_id += 1

    filename = type.to_s + "_#{file_id.to_s.rjust(6, '0')}"

    output_file = output_directory + "/#{filename}.json"
    
    returned_collection = collection.find limit: chunk_size, offset: offset
    
    returned_collection.each do |item|
      
      delete_keys_for_type type, item
    
    end

    formatted_results = format_results_for_type type, returned_collection

    write_results_to_file formatted_results, output_file
    
    update_progress_bar progress_bar, chunk_size, collection_count

    offset += chunk_size

  end
  
end

.format_results_for_type(type, results) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/pure/extractor.rb', line 56

def self.format_results_for_type type, results

  formatted_results = []

  case type

    when :organisation

      results.each do |result|

        formatted_result = {
            system: {
                uuid: result["uuid"],
                modified_at: result["modified"]
            },
            details: {
                name: result["name"],
                description: nil,
                url: result["url"][0],
                isni: nil,
                type: result["type"]
            },
            parent: {
                uuid: result["parent"]["uuid"]
            }
        }

        formatted_results.push formatted_result

      end

    else
      formatted_results = results

  end

  formatted_results

end

.update_progress_bar(progress_bar, limit, collection_count) ⇒ Object



122
123
124
125
126
127
128
129
130
# File 'lib/pure/extractor.rb', line 122

def self.update_progress_bar progress_bar, limit, collection_count
  
  if (progress_bar.progress + limit) < collection_count
    progress_bar.progress += limit 
  else
    progress_bar.progress = collection_count
  end
  
end

.write_results_to_file(results, file) ⇒ Object



132
133
134
135
136
137
138
# File 'lib/pure/extractor.rb', line 132

def self.write_results_to_file results, file
  
  File.open(file, "w") do |f|
    f.write(JSON.pretty_generate(results))
  end
  
end