Class: RelatonIetf::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_ietf/data_fetcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, output, format) ⇒ DataFetcher

Data fetcher initializer

Parameters:

  • source (String)

    source name

  • output (String)

    directory to save files

  • format (String)

    format of output files (xml, yaml, bibxml); for ietf-rfcsubseries source only: xml



17
18
19
20
21
22
23
# File 'lib/relaton_ietf/data_fetcher.rb', line 17

def initialize(source, output, format)
  @source = source
  @output = output
  @format = format
  @ext = @format.sub(/^bib|^rfc/, "")
  @files = []
end

Class Method Details

.fetch(source, output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch

Parameters:

  • source (String)

    source name

  • output (Strin) (defaults to: "data")

    directory to save files, default: “data”

  • format (Strin) (defaults to: "yaml")

    format of output files (xml, yaml, bibxml); default: yaml; for ietf-rfcsubseries source only: xml



33
34
35
36
37
38
39
40
41
# File 'lib/relaton_ietf/data_fetcher.rb', line 33

def self.fetch(source, output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output unless Dir.exist? output
  new(source, output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetchObject

Fetch documents



46
47
48
49
50
51
52
# File 'lib/relaton_ietf/data_fetcher.rb', line 46

def fetch
  case @source
  when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries
  when "ietf-internet-drafts" then fetch_ieft_internet_drafts
  when "ietf-rfc-entries" then fetch_ieft_rfcs
  end
end

#fetch_ieft_internet_draftsObject

Fetches ietf-internet-drafts documents



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/relaton_ietf/data_fetcher.rb', line 66

def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength
  # gz = OpenURI.open_uri("https://www.ietf.org/lib/dt/sprint/bibxml-ids.tgz")
  # z = Zlib::GzipReader.new(gz)
  # io = StringIO.new(z.read)
  # z.close
  # Gem::Package::TarReader.new io do |tar|
  #   tar.each do |tarfile|
  #     next if tarfile.directory?

  #     save_doc BibXMLParser.parse(tarfile.read)
  #   end
  # end
  Dir["bibxml-ids/*.xml"].each do |file|
    save_doc BibXMLParser.parse(File.read(file, encoding: "UTF-8"))
  end
end

#fetch_ieft_rfcsObject



83
84
85
86
87
# File 'lib/relaton_ietf/data_fetcher.rb', line 83

def fetch_ieft_rfcs
  rfc_index.xpath("xmlns:rfc-entry").each do |doc|
    save_doc RfcEntry.parse(doc)
  end
end

#fetch_ieft_rfcsubseriesObject

Fetches ietf-rfcsubseries documents



57
58
59
60
61
# File 'lib/relaton_ietf/data_fetcher.rb', line 57

def fetch_ieft_rfcsubseries
  rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc|
    save_doc RfcIndexEntry.parse(doc)
  end
end

#file_name(entry) ⇒ String

Generate file name

Parameters:

Returns:

  • (String)

    file name



123
124
125
126
127
128
129
130
131
132
133
# File 'lib/relaton_ietf/data_fetcher.rb', line 123

def file_name(entry)
  id = if entry.respond_to? :docidentifier
         entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id
       end
  id ||= entry.docnumber
  if @source == "ietf-internet-drafts" then id.downcase!
  else id.upcase!
  end
  name = id.gsub(/[\s,:\/]/, "_").squeeze("_")
  File.join @output, "#{name}.#{@ext}"
end

#rfc_indexObject



89
90
91
92
# File 'lib/relaton_ietf/data_fetcher.rb', line 89

def rfc_index
  uri = URI "https://www.rfc-editor.org/rfc-index.xml"
  Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index")
end

#save_doc(entry) ⇒ Object

Save document to file

Parameters:



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/relaton_ietf/data_fetcher.rb', line 99

def save_doc(entry) # rubocop:disable Metrics/MethodLength
  return unless entry

  c = case @format
      when "xml" then entry.to_xml(bibdata: true)
      when "yaml" then entry.to_hash.to_yaml
      else entry.send("to_#{@format}")
      end
  file = file_name entry
  if @files.include? file
    warn "File #{file} already exists. Document: #{entry.docnumber}"
  else
    @files << file
  end
  File.write file, c, encoding: "UTF-8"
end