Class: RelatonOgc::DataFetcher

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/relaton_ogc/data_fetcher.rb

Defined Under Namespace

Modules: Utils

Constant Summary

Constants included from Utils

Utils::ENDPOINT

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utils

#etag, #etag=, #get_data

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Create DataFetcher instance

Parameters:

  • output (String)

    directory to save the documents

  • format (String)

    output format “yaml” or “xmo”



48
49
50
51
52
53
54
# File 'lib/relaton_ogc/data_fetcher.rb', line 48

def initialize(output, format)
  @output = output
  @etagfile = File.join output, "etag.txt"
  @format = format
  @docids = []
  @dupids = []
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ Object



56
57
58
59
60
61
62
63
64
# File 'lib/relaton_ogc/data_fetcher.rb', line 56

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetchObject

rubocop:disable Metrics/MethodLength, Metrics/AbcSize



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/relaton_ogc/data_fetcher.rb', line 66

def fetch # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  get_data do |etag, json|
    no_errors = true
    json.each do |_, hit|
      next if hit["type"] == "CC"

      bib = Scrapper.parse_page hit
      write_document bib
    rescue StandardError => e
      no_errors = false
      warn "Fetching document: #{hit['identifier']}"
      warn "#{e.class} #{e.message}"
      warn e.backtrace
    end
    warn "[relaton-ogc] WARNING Duplicated documents: #{@dupids.uniq.join(', ')}" if @dupids.any?
    self.etag = etag if no_errors
  end
end

#write_document(bib) ⇒ Object

rubocop:disable Metrics/AbcSize



85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/relaton_ogc/data_fetcher.rb', line 85

def write_document(bib) # rubocop:disable Metrics/AbcSize
  if @docids.include?(bib.docidentifier[0].id)
    @dupids << bib.docidentifier[0].id
    return
  end

  @docids << bib.docidentifier[0].id
  name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_")
  file = "#{@output}/#{name}.#{@format}"
  content = @format == "xml" ? bib.to_xml(bibdata: true) : bib.to_hash.to_yaml
  File.write file, content, encoding: "UTF-8"
end