Class: RelatonIetf::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_ietf/data_fetcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, output, format) ⇒ DataFetcher

Data fetcher initializer



17
18
19
20
21
22
23
# File 'lib/relaton_ietf/data_fetcher.rb', line 17

def initialize(source, output, format)
  @source = source
  @output = output
  @format = format
  @ext = @format.sub(/^bib|^rfc/, "")
  @files = []
end

Class Method Details

.fetch(source, output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch



33
34
35
36
37
38
39
40
41
# File 'lib/relaton_ietf/data_fetcher.rb', line 33

def self.fetch(source, output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(source, output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#create_series(ref, versions) ⇒ Object

Create unversioned bibliographic item



117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/relaton_ietf/data_fetcher.rb', line 117

def create_series(ref, versions) # rubocop:disable Metrics/AbcSize
  return if versions.size < 2

  vs = versions.sort_by { |v| v.match(/\d+$/).to_s.to_i }
  fref = RelatonBib::FormattedRef.new content: ref
  docid = RelatonBib::DocumentIdentifier.new type: "Internet-Draft", id: ref, primary: true
  rel = vs.map { |v| version_relation v, "includes" }
  last_v = HashConverter.hash_to_bib YAML.load_file("#{@output}/#{vs.last}.#{@ext}")
  bib = IetfBibliographicItem.new(
    title: last_v[:title], abstract: last_v[:abstract], formattedref: fref, docid: [docid], relation: rel,
  )
  save_doc bib
end

#fetchObject

Fetch documents



46
47
48
49
50
51
52
# File 'lib/relaton_ietf/data_fetcher.rb', line 46

def fetch
  case @source
  when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries
  when "ietf-internet-drafts" then fetch_ieft_internet_drafts
  when "ietf-rfc-entries" then fetch_ieft_rfcs
  end
end

#fetch_ieft_internet_draftsObject

Fetches ietf-internet-drafts documents



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/relaton_ietf/data_fetcher.rb', line 66

def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
  versions = Dir["bibxml-ids/*.xml"].each_with_object([]) do |path, vers|
    file = File.basename path, ".xml"
    if file.include?("D.draft-")
      vers << file.sub(/^reference\.I-D\./, "")
      /(?<ver>\d+)$/ =~ file
    end
    bib = BibXMLParser.parse(File.read(path, encoding: "UTF-8"))
    if ver
      version = RelatonBib::BibliographicItem::Version.new nil, ver
      bib.instance_variable_set :@version, [version]
    end
    save_doc bib
  end
  update_versions(versions) if versions.any? && @format != "bibxml"
end

#fetch_ieft_rfcsObject

Fetches ietf-rfc-entries documents



165
166
167
168
169
170
171
172
# File 'lib/relaton_ietf/data_fetcher.rb', line 165

def fetch_ieft_rfcs
  rfc_index.xpath("xmlns:rfc-entry").each do |doc|
    save_doc RfcEntry.parse(doc)
  rescue StandardError => e
    warn "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.message}"
    warn e.backtrace[0..5].join("\n")
  end
end

#fetch_ieft_rfcsubseriesObject

Fetches ietf-rfcsubseries documents



57
58
59
60
61
# File 'lib/relaton_ietf/data_fetcher.rb', line 57

def fetch_ieft_rfcsubseries
  rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc|
    save_doc RfcIndexEntry.parse(doc)
  end
end

#file_name(entry) ⇒ String

Generate file name



214
215
216
217
218
219
220
221
222
223
224
# File 'lib/relaton_ietf/data_fetcher.rb', line 214

def file_name(entry) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
  id = if entry.respond_to? :docidentifier
         entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id
       end
  id ||= entry.docnumber || entry.formattedref.content
  if @source == "ietf-internet-drafts" then id.downcase!
  else id.upcase!
  end
  name = id.gsub(/[\s,:\/]/, "_").squeeze("_")
  File.join @output, "#{name}.#{@ext}"
end

#read_doc(file) ⇒ RelatonIetf::IetfBibliographicItem

Redad saved documents



153
154
155
156
157
158
159
160
# File 'lib/relaton_ietf/data_fetcher.rb', line 153

def read_doc(file)
  doc = File.read(file, encoding: "UTF-8")
  case @format
  when "xml" then XMLParser.from_xml(doc)
  when "yaml" then IetfBibliographicItem.from_hash YAML.safe_load(doc)
  else BibXMLParser.parse(doc)
  end
end

#rfc_indexNokogiri::XML::Document

Get RFC index



179
180
181
182
# File 'lib/relaton_ietf/data_fetcher.rb', line 179

def rfc_index
  uri = URI "https://www.rfc-editor.org/rfc-index.xml"
  Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index")
end

#save_doc(entry, check_duplicate: true) ⇒ Object

Save document to file



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/relaton_ietf/data_fetcher.rb', line 190

def save_doc(entry, check_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
  return unless entry

  c = case @format
      when "xml" then entry.to_xml(bibdata: true)
      when "yaml" then entry.to_hash.to_yaml
      else entry.send("to_#{@format}")
      end
  file = file_name entry
  if check_duplicate && @files.include?(file)
    warn "File #{file} already exists. Document: #{entry.docnumber}"
  elsif check_duplicate
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end

#update_versions(versions) ⇒ Object

Updates I-D’s versions



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/relaton_ietf/data_fetcher.rb', line 88

def update_versions(versions) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
  series = ""
  bib_versions = []
  Dir["#{@output}/*.#{@ext}"].each do |file|
    match = /(?<series>draft-.+)-(?<ver>\d{2})\.#{@ext}$/.match file
    if match
      if series != match[:series]
        bib_versions = versions.select { |ref| ref.include? match[:series] }
        create_series match[:series], bib_versions
        series = match[:series]
      end
      lv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i < match[:ver].to_i }
      hv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i > match[:ver].to_i }
      if lv.any? || hv.any?
        bib = read_doc(file)
        bib.relation << version_relation(lv.last, "updates") if lv.any?
        bib.relation << version_relation(hv.first, "updatedBy") if hv.any?
        save_doc bib, check_duplicate: false
      end
    end
  end
end

#version_relation(ref, type) ⇒ RelatonBib::DocumentRelation

Create bibitem relation



139
140
141
142
143
144
# File 'lib/relaton_ietf/data_fetcher.rb', line 139

def version_relation(ref, type)
  fref = RelatonBib::FormattedRef.new content: ref
  docid = RelatonBib::DocumentIdentifier.new type: "Internet-Draft", id: ref, primary: true
  bibitem = IetfBibliographicItem.new formattedref: fref, docid: [docid]
  RelatonBib::DocumentRelation.new(type: type, bibitem: bibitem)
end