Class: RelatonOgc::DataFetcher
- Inherits:
-
Object
- Object
- RelatonOgc::DataFetcher
- Includes:
- Utils
- Defined in:
- lib/relaton_ogc/data_fetcher.rb
Defined Under Namespace
Modules: Utils
Constant Summary
Constants included from Utils
Class Method Summary collapse
Instance Method Summary collapse
-
#fetch ⇒ Object
rubocop:disable Metrics/MethodLength, Metrics/AbcSize.
-
#initialize(output, format) ⇒ DataFetcher
constructor
Create DataFetcher instance.
-
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize.
Methods included from Utils
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Create DataFetcher instance
48 49 50 51 52 53 54 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 48 def initialize(output, format) @output = output @etagfile = File.join output, "etag.txt" @format = format @docids = [] @dupids = [] end |
Class Method Details
.fetch(output: "data", format: "yaml") ⇒ Object
56 57 58 59 60 61 62 63 64 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 56 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output unless Dir.exist? output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch ⇒ Object
rubocop:disable Metrics/MethodLength, Metrics/AbcSize
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 66 def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize get_data do |etag, json| no_errors = true json.each do |_, hit| next if hit["type"] == "CC" bib = Scrapper.parse_page hit write_document bib rescue StandardError => e no_errors = false warn "Fetching document: #{hit['identifier']}" warn "#{e.class} #{e.}" warn e.backtrace end warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any? self.etag = etag if no_errors end end |
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize
85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 85 def write_document(bib) # rubocop:disable Metrics/AbcSize if @docids.include?(bib.docidentifier[0].id) @dupids << bib.docidentifier[0].id return end @docids << bib.docidentifier[0].id name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_") file = "#{@output}/#{name}.#{@format}" content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml File.write file, content, encoding: "UTF-8" end |