Class: RelatonIetf::DataFetcher
- Inherits:
-
Object
- Object
- RelatonIetf::DataFetcher
- Defined in:
- lib/relaton_ietf/data_fetcher.rb
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#fetch ⇒ Object
Fetch documents.
-
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents.
- #fetch_ieft_rfcs ⇒ Object
-
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents.
-
#file_name(entry) ⇒ String
Generate file name.
-
#initialize(source, output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
- #rfc_index ⇒ Object
-
#save_doc(entry) ⇒ Object
Save document to file.
Constructor Details
#initialize(source, output, format) ⇒ DataFetcher
Data fetcher initializer
17 18 19 20 21 22 23 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 17 def initialize(source, output, format) @source = source @output = output @format = format @ext = @format.sub(/^bib|^rfc/, "") @files = [] end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 33 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output unless Dir.exist? output new(source, output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch ⇒ Object
Fetch documents
46 47 48 49 50 51 52 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 46 def fetch case @source when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries when "ietf-internet-drafts" then fetch_ieft_internet_drafts when "ietf-rfc-entries" then fetch_ieft_rfcs end end |
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents
66 67 68 69 70 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 66 def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength Dir["bibxml-ids/*.xml"].each do |file| save_doc BibXMLParser.parse(File.read(file, encoding: "UTF-8")) end end |
#fetch_ieft_rfcs ⇒ Object
72 73 74 75 76 77 78 79 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 72 def fetch_ieft_rfcs rfc_index.xpath("xmlns:rfc-entry").each do |doc| save_doc RfcEntry.parse(doc) rescue StandardError => e warn "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.message}" warn e.backtrace[0..5].join("\n") end end |
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents
57 58 59 60 61 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 57 def fetch_ieft_rfcsubseries rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc| save_doc RfcIndexEntry.parse(doc) end end |
#file_name(entry) ⇒ String
Generate file name
115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 115 def file_name(entry) id = if entry.respond_to? :docidentifier entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id end id ||= entry.docnumber if @source == "ietf-internet-drafts" then id.downcase! else id.upcase! end name = id.gsub(/[\s,:\/]/, "_").squeeze("_") File.join @output, "#{name}.#{@ext}" end |
#rfc_index ⇒ Object
81 82 83 84 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 81 def rfc_index uri = URI "https://www.rfc-editor.org/rfc-index.xml" Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index") end |
#save_doc(entry) ⇒ Object
Save document to file
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 91 def save_doc(entry) # rubocop:disable Metrics/MethodLength return unless entry c = case @format when "xml" then entry.to_xml(bibdata: true) when "yaml" then entry.to_hash.to_yaml else entry.send("to_#{@format}") end file = file_name entry if @files.include? file warn "File #{file} already exists. Document: #{entry.docnumber}" else @files << file end File.write file, c, encoding: "UTF-8" end |