Class: RelatonIetf::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_ietf/data_fetcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, output, format) ⇒ DataFetcher

Data fetcher initializer

Parameters:

  • source (String)

    source name

  • output (String)

    directory to save files

  • format (String)

    format of output files (xml, yaml, bibxml); for ietf-rfcsubseries source only: xml



17
18
19
20
21
22
23
# File 'lib/relaton_ietf/data_fetcher.rb', line 17

def initialize(source, output, format)
  @source = source
  @output = output
  @format = format
  @ext = @format.sub(/^bib|^rfc/, "")
  @files = []
end

Class Method Details

.fetch(source, output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch

Parameters:

  • source (String)

    source name

  • output (Strin) (defaults to: "data")

    directory to save files, default: “data”

  • format (Strin) (defaults to: "yaml")

    format of output files (xml, yaml, bibxml); default: yaml; for ietf-rfcsubseries source only: xml



33
34
35
36
37
38
39
40
41
# File 'lib/relaton_ietf/data_fetcher.rb', line 33

def self.fetch(source, output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(source, output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetchObject

Fetch documents



46
47
48
49
50
51
52
# File 'lib/relaton_ietf/data_fetcher.rb', line 46

def fetch
  case @source
  when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries
  when "ietf-internet-drafts" then fetch_ieft_internet_drafts
  when "ietf-rfc-entries" then fetch_ieft_rfcs
  end
end

#fetch_ieft_internet_draftsObject

Fetches ietf-internet-drafts documents



66
67
68
69
70
# File 'lib/relaton_ietf/data_fetcher.rb', line 66

def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength
  Dir["bibxml-ids/*.xml"].each do |file|
    save_doc BibXMLParser.parse(File.read(file, encoding: "UTF-8"))
  end
end

#fetch_ieft_rfcsObject



72
73
74
75
76
77
78
79
# File 'lib/relaton_ietf/data_fetcher.rb', line 72

def fetch_ieft_rfcs
  rfc_index.xpath("xmlns:rfc-entry").each do |doc|
    save_doc RfcEntry.parse(doc)
  rescue StandardError => e
    warn "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.message}"
    warn e.backtrace[0..5].join("\n")
  end
end

#fetch_ieft_rfcsubseriesObject

Fetches ietf-rfcsubseries documents



57
58
59
60
61
# File 'lib/relaton_ietf/data_fetcher.rb', line 57

def fetch_ieft_rfcsubseries
  rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc|
    save_doc RfcIndexEntry.parse(doc)
  end
end

#file_name(entry) ⇒ String

Generate file name

Parameters:

Returns:

  • (String)

    file name



115
116
117
118
119
120
121
122
123
124
125
# File 'lib/relaton_ietf/data_fetcher.rb', line 115

def file_name(entry)
  id = if entry.respond_to? :docidentifier
         entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id
       end
  id ||= entry.docnumber
  if @source == "ietf-internet-drafts" then id.downcase!
  else id.upcase!
  end
  name = id.gsub(/[\s,:\/]/, "_").squeeze("_")
  File.join @output, "#{name}.#{@ext}"
end

#rfc_indexObject



81
82
83
84
# File 'lib/relaton_ietf/data_fetcher.rb', line 81

def rfc_index
  uri = URI "https://www.rfc-editor.org/rfc-index.xml"
  Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index")
end

#save_doc(entry) ⇒ Object

Save document to file

Parameters:



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/relaton_ietf/data_fetcher.rb', line 91

def save_doc(entry) # rubocop:disable Metrics/MethodLength
  return unless entry

  c = case @format
      when "xml" then entry.to_xml(bibdata: true)
      when "yaml" then entry.to_hash.to_yaml
      else entry.send("to_#{@format}")
      end
  file = file_name entry
  if @files.include? file
    warn "File #{file} already exists. Document: #{entry.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end