Class: RelatonIetf::DataFetcher
- Inherits:
-
Object
- Object
- RelatonIetf::DataFetcher
- Defined in:
- lib/relaton_ietf/data_fetcher.rb
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#create_series(ref, versions) ⇒ Object
Create unversioned bibliographic item.
-
#fetch ⇒ Object
Fetch documents.
-
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents.
-
#fetch_ieft_rfcs ⇒ Object
Fetches ietf-rfc-entries documents.
-
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents.
-
#file_name(entry) ⇒ String
Generate file name.
-
#initialize(source, output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
-
#read_doc(file) ⇒ RelatonIetf::IetfBibliographicItem
Redad saved documents.
-
#rfc_index ⇒ Nokogiri::XML::Document
Get RFC index.
-
#save_doc(entry, check_duplicate: true) ⇒ Object
Save document to file.
-
#update_versions(versions) ⇒ Object
Updates I-D’s versions.
-
#version_relation(ref, type) ⇒ RelatonBib::DocumentRelation
Create bibitem relation.
Constructor Details
#initialize(source, output, format) ⇒ DataFetcher
Data fetcher initializer
17 18 19 20 21 22 23 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 17 def initialize(source, output, format) @source = source @output = output @format = format @ext = @format.sub(/^bib|^rfc/, "") @files = [] end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 33 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output unless Dir.exist? output new(source, output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#create_series(ref, versions) ⇒ Object
Create unversioned bibliographic item
111 112 113 114 115 116 117 118 119 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 111 def create_series(ref, versions) return if versions.size < 2 fref = RelatonBib::FormattedRef.new content: ref rel = versions.map do |v| version_relation v, "includes" end save_doc IetfBibliographicItem.new(formattedref: fref, relation: rel) end |
#fetch ⇒ Object
Fetch documents
46 47 48 49 50 51 52 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 46 def fetch case @source when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries when "ietf-internet-drafts" then fetch_ieft_internet_drafts when "ietf-rfc-entries" then fetch_ieft_rfcs end end |
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents
66 67 68 69 70 71 72 73 74 75 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 66 def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength versions = Dir["bibxml-ids/*.xml"].each_with_object([]) do |path, vers| file = File.basename path, ".xml" if file.include?("D.draft-") vers << file.sub(/^reference\.I-D\./, "") end save_doc BibXMLParser.parse(File.read(path, encoding: "UTF-8")) end update_versions(versions) if versions.any? && @format != "bibxml" end |
#fetch_ieft_rfcs ⇒ Object
Fetches ietf-rfc-entries documents
154 155 156 157 158 159 160 161 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 154 def fetch_ieft_rfcs rfc_index.xpath("xmlns:rfc-entry").each do |doc| save_doc RfcEntry.parse(doc) rescue StandardError => e warn "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.message}" warn e.backtrace[0..5].join("\n") end end |
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents
57 58 59 60 61 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 57 def fetch_ieft_rfcsubseries rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc| save_doc RfcIndexEntry.parse(doc) end end |
#file_name(entry) ⇒ String
Generate file name
203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 203 def file_name(entry) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity id = if entry.respond_to? :docidentifier entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id end id ||= entry.docnumber || entry.formattedref.content if @source == "ietf-internet-drafts" then id.downcase! else id.upcase! end name = id.gsub(/[\s,:\/]/, "_").squeeze("_") File.join @output, "#{name}.#{@ext}" end |
#read_doc(file) ⇒ RelatonIetf::IetfBibliographicItem
Redad saved documents
142 143 144 145 146 147 148 149 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 142 def read_doc(file) doc = File.read(file, encoding: "UTF-8") case @format when "xml" then XMLParser.from_xml(doc) when "yaml" then IetfBibliographicItem.from_hash YAML.safe_load(doc) else BibXMLParser.parse(doc) end end |
#rfc_index ⇒ Nokogiri::XML::Document
Get RFC index
168 169 170 171 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 168 def rfc_index uri = URI "https://www.rfc-editor.org/rfc-index.xml" Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index") end |
#save_doc(entry, check_duplicate: true) ⇒ Object
Save document to file
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 179 def save_doc(entry, check_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity return unless entry c = case @format when "xml" then entry.to_xml(bibdata: true) when "yaml" then entry.to_hash.to_yaml else entry.send("to_#{@format}") end file = file_name entry if check_duplicate && @files.include?(file) warn "File #{file} already exists. Document: #{entry.docnumber}" elsif check_duplicate @files << file end File.write file, c, encoding: "UTF-8" end |
#update_versions(versions) ⇒ Object
Updates I-D’s versions
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 82 def update_versions(versions) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity series = "" bib_versions = [] Dir["#{@output}/*.#{@ext}"].each do |file| match = /(?<series>draft-.+)-(?<ver>\d{2})\.#{@ext}$/.match file if match if series != match[:series] bib_versions = versions.select { |ref| ref.include? match[:series] } create_series match[:series], bib_versions end lv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i < match[:ver].to_i } hv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i > match[:ver].to_i } if lv.any? || hv.any? bib = read_doc(file) bib.relation << version_relation(lv.last, "updates") if lv.any? bib.relation << version_relation(hv.first, "updatedBy") if hv.any? save_doc bib, check_duplicate: false end series = match[:series] end end end |
#version_relation(ref, type) ⇒ RelatonBib::DocumentRelation
Create bibitem relation
129 130 131 132 133 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 129 def version_relation(ref, type) fref = RelatonBib::FormattedRef.new content: ref bibitem = IetfBibliographicItem.new formattedref: fref RelatonBib::DocumentRelation.new(type: type, bibitem: bibitem) end |