Module: Oddb2xml

Defined in:
lib/oddb2xml.rb,
lib/oddb2xml/cli.rb,
lib/oddb2xml/calc.rb,
lib/oddb2xml/util.rb,
lib/oddb2xml/builder.rb,
lib/oddb2xml/compare.rb,
lib/oddb2xml/options.rb,
lib/oddb2xml/version.rb,
lib/oddb2xml/extractor.rb,
lib/oddb2xml/compressor.rb,
lib/oddb2xml/downloader.rb,
lib/oddb2xml/semantic_check.rb,
lib/oddb2xml/chapter_70_hack.rb

Defined Under Namespace

Modules: DownloadMethod, Options, TxtExtractorMethods Classes: BagXmlDownloader, BagXmlExtractor, Builder, Calc, Chapter70xtractor, Cli, CompareV5, Compressor, Downloader, EphaDownloader, EphaExtractor, Extractor, GalenicForm, GalenicGroup, LppvDownloader, LppvExtractor, MedregbmDownloader, MedregbmExtractor, MigelDownloader, MigelExtractor, RefdataDownloader, RefdataExtractor, SemanticCheck, SemanticCheckXML, StammXML, SwissmedicDownloader, SwissmedicExtractor, SwissmedicInfoDownloader, SwissmedicInfoExtractor, ZurroseDownloader, ZurroseExtractor

Constant Summary collapse

FAKE_GTIN_START =
"999999"
WORK_DIR =
Dir.pwd
DOWNLOADS =
"#{Dir.pwd}/downloads"
COLUMNS_FEBRUARY_2019 =

please keep this constant in sync between (GEM) swissmedic-diff/lib/swissmedic-diff.rb and (GEM) oddb2xml/lib/oddb2xml/extractor.rb

{
  iksnr: /Zulassungs-Nummer/i, # column-nr: 0
  seqnr: /Dosisstärke-nummer/i,
  name_base: /Bezeichnung des Arzneimittels/i,
  company: /Zulassungsinhaberin/i,
  production_science: /Heilmittelcode/i,
  index_therapeuticus: /IT-Nummer/i, # column-nr: 5
  atc_class: /ATC-Code/i,
  registration_date: /Erstzul.datum Arzneimittel/i,
  sequence_date: /Zul.datum Dosisstärke/i,
  expiry_date: /Gültigkeitsdauer der Zulassung/i,
  ikscd: /Packungscode/i, # column-nr: 10
  size: /Packungsgrösse/i,
  unit: /Einheit/i,
  ikscat: /Abgabekategorie Packung/i,
  ikscat_seq: /Abgabekategorie Dosisstärke/i,
  ikscat_preparation: /Abgabekategorie Arzneimittel/i, # column-nr: 15
  substances: /Wirkstoff/i,
  composition: /Zusammensetzung/i,
  composition_AMZV: /Volldeklaration rev. AMZV umgesetzt/i,
  indication_registration: /Anwendungsgebiet Arzneimittel/i,
  indication_sequence: /Anwendungsgebiet Dosisstärke/i, # column-nr 20
  gen_production: /Gentechnisch hergestellte Wirkstoffe/i,
  insulin_category: /Kategorie bei Insulinen/i,
  # swissmedi corrected in february 2018 the typo  betäubunsmittel to  betäubungsmittel-
  drug_index: /Verz. bei betäubungsmittel-haltigen Arzneimittel/i
}
COLUMNS_JULY_2015 =
{
  iksnr: /Zulassungs-Nummer/i, # column-nr: 0
  seqnr: /Dosisstärke-nummer/i,
  name_base: /Präparatebezeichnung/i,
  company: /Zulassungsinhaberin/i,
  production_science: /Heilmittelcode/i,
  index_therapeuticus: /IT-Nummer/i, # column-nr: 5
  atc_class: /ATC-Code/i,
  registration_date: /Erstzulassungs-datum./i,
  sequence_date: /Zul.datum Dosisstärke/i,
  expiry_date: /Gültigkeitsdauer der Zulassung/i,
  ikscd: /Packungscode/i, # column-nr: 10
  size: /Packungsgrösse/i,
  unit: /Einheit/i,
  ikscat: /Abgabekategorie Arzneimittel/i,
  ikscat_seq: /Abgabekategorie Dosisstärke/i,
  ikscat_preparation: /Abgabekategorie Präparat/i, # column-nr: 15
  substances: /Wirkstoff/i,
  composition: /Zusammensetzung/i,
  indication_registration: /Anwendungsgebiet Präparat/i,
  indication_sequence: /Anwendungsgebiet Dosisstärke/i,
  gen_production: /Gentechnisch hergestellte Wirkstoffe/i, # column-nr 20
  insulin_category: /Kategorie bei Insulinen/i,
  # swissmedi corrected in february 2018 the typo  betäubunsmittel to  betäubungsmittel-
  drug_index: /Verz. bei betäubun.*smittel-haltigen Präparaten/i
}
XML_OPTIONS =
{
  "xmlns:xsd" => "http://www.w3.org/2001/XMLSchema",
  "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
  "xmlns" => "http://wiki.oddb.org/wiki.php?pagename=Swissmedic.Datendeklaration",
  "CREATION_DATETIME" => Time.new.strftime("%FT%T%z"),
  "PROD_DATE" => Time.new.strftime("%FT%T%z"),
  "VALID_DATE" => Time.new.strftime("%FT%T%z"),
  "GENERATED_BY" => "oddb2xml #{VERSION}"
}
VERSION =
"2.7.2"
@@prodno_to_ean13 =

Needed for ensuring consistency for the Artikelstamm

{}
@@no8_to_ean13 =
{}
@@ean13_to_prodno =
{}
@@ean13_to_no8 =
{}

Class Method Summary collapse

Class Method Details

.add_epha_changes_for_ATC(iksnr, atc_code, force_run: false) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/oddb2xml/util.rb', line 68

def self.add_epha_changes_for_ATC(iksnr, atc_code, force_run: false)
  @atc_csv_content = {} if force_run
  if @atc_csv_content.size == 0
    Oddb2xml.uri_open(@atc_csv_origin).readlines.each { |line|
      items = line.split(",")
      @atc_csv_content[[items[0], items[1]]] = items[2]
    }

  end
  new_value = @atc_csv_content[[iksnr.to_s, atc_code]]
  new_value || atc_code
end

.add_hash(string) ⇒ Object



190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/oddb2xml/util.rb', line 190

def self.add_hash(string)
  doc = Nokogiri::XML.parse(string) do |config|
    config.huge
  end
  nr = 0
  doc.root.elements.each do |node|
    nr += 1
    next if node.name.eql?("RESULT")
    node["SHA256"] = Digest::SHA256.hexdigest node.text
  end
  doc.to_xml
end

.calc_checksum(str) ⇒ Object



19
20
21
22
23
24
25
26
27
28
# File 'lib/oddb2xml/util.rb', line 19

def self.calc_checksum(str)
  str = str.strip
  sum = 0
  val = str.split(//u)
  12.times do |idx|
    fct = ((idx % 2) * 2) + 1
    sum += fct * val[idx].to_i
  end
  ((10 - (sum % 10)) % 10).to_s
end

.check_column_indices(sheet) ⇒ Object

please keep this constant in sync between (GEM) swissmedic-diff/lib/swissmedic-diff.rb and (GEM) oddb2xml/lib/oddb2xml/extractor.rb



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/oddb2xml/util.rb', line 118

def self.check_column_indices(sheet)
  row = sheet[5] # Headers are found at row 5 since February 5

  error_2019 = nil
  0.upto(COLUMNS_FEBRUARY_2019.size - 1).each { |idx| puts "#{idx}: #{row[idx].value}" } if $VERBOSE
  COLUMNS_FEBRUARY_2019.each { |key, value|
    header_name = row[COLUMNS_FEBRUARY_2019.keys.index(key)].value.to_s
    unless value.match(header_name)
      puts "#{__LINE__}: #{key} ->  #{COLUMNS_FEBRUARY_2019.keys.index(key)} #{value}\nbut was  #{header_name}" if $VERBOSE
      error_2019 = "Packungen.xlslx_has_unexpected_column_#{COLUMNS_FEBRUARY_2019.keys.index(key)}_#{key}_#{value}_but_was_#{header_name}"
      # require 'pry'; binding.pry
      break
    end
  }
  raise error_2019.to_s if error_2019
end

.convert_to_8859_1(line) ⇒ Object



60
61
62
63
64
65
66
# File 'lib/oddb2xml/util.rb', line 60

def self.convert_to_8859_1(line)
  # We want to ignore lines which are not really UTF-8 encoded
  ausgabe = Oddb2xml.patch_some_utf8(line).encode("ISO-8859-1")
  ausgabe.encode("ISO-8859-1")
rescue => error
  puts "#{error}: in #{line}"
end

.download_finished(file, remove_file = true) ⇒ Object



106
107
108
109
110
111
112
113
114
115
# File 'lib/oddb2xml/util.rb', line 106

def self.download_finished(file, remove_file = true)
  src = "#{WORK_DIR}/#{File.basename(file)}"
  dest = "#{DOWNLOADS}/#{File.basename(file)}"
  FileUtils.makedirs(DOWNLOADS)
  # return unless File.exists?(file)
  return unless file && File.exist?(file)
  return if File.expand_path(file).eql?(dest)
  FileUtils.cp(src, dest, verbose: false)
  Oddb2xml.log("download_finished saved as #{dest} #{File.size(dest)} bytes.")
end

.gen_prodno(iksnr, seqnr) ⇒ Object



6
7
8
# File 'lib/oddb2xml/util.rb', line 6

def self.gen_prodno(iksnr, seqnr)
  sprintf("%05d", iksnr) + sprintf("%02d", seqnr)
end

.getEan13forNo8(no8) ⇒ Object



264
265
266
# File 'lib/oddb2xml/util.rb', line 264

def self.getEan13forNo8(no8)
  @@no8_to_ean13[no8] || []
end

.getEan13forProdno(prodno) ⇒ Object



260
261
262
# File 'lib/oddb2xml/util.rb', line 260

def self.getEan13forProdno(prodno)
  @@prodno_to_ean13[prodno] || []
end

.getNo8ForEan13(ean13) ⇒ Object



272
273
274
# File 'lib/oddb2xml/util.rb', line 272

def self.getNo8ForEan13(ean13)
  @@ean13_to_no8[ean13]
end

.getProdnoForEan13(ean13) ⇒ Object



268
269
270
# File 'lib/oddb2xml/util.rb', line 268

def self.getProdnoForEan13(ean13)
  @@ean13_to_prodno[ean13]
end

.html_decode(string) ⇒ Object



38
39
40
41
42
43
44
45
# File 'lib/oddb2xml/util.rb', line 38

def self.html_decode(string)
  german = string
  german = string.force_encoding("ISO-8859-1").encode("UTF-8") if string.encoding.to_s.eql?("ASCII")
  until german.eql?(HTMLEntities.new.decode(german))
    german = HTMLEntities.new.decode(german)
  end
  Oddb2xml.patch_some_utf8(german).gsub("<br>", "\n")
end

.log(msg) ⇒ Object



81
82
83
84
85
86
# File 'lib/oddb2xml/util.rb', line 81

def self.log(msg)
  return unless @options[:log]
  # TODO:: require 'pry'; binding.pry if msg.size > 1000
  $stdout.puts "#{Time.now.strftime("%Y-%m-%d %H:%M:%S")}: #{msg[0..250]}"
  $stdout.flush
end

.log_timestamp(msg) ⇒ Object



4
5
6
7
8
9
# File 'lib/oddb2xml/compare.rb', line 4

def self.log_timestamp(msg)
  full_msg = "#{Time.now.strftime("%H:%M:%S")}: #{msg}"
  puts full_msg
  $stdout.flush
  full_msg
end

.patch_some_utf8(line) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/oddb2xml/util.rb', line 47

def self.patch_some_utf8(line)
  begin
    line = line.encode("utf-8")
  rescue
  end
  begin
    line.tr("\u0089", "").tr("\u0092", "").tr("\u0096", "-").tr("\u2013", "-").tr("\u201D", '"').chomp
  rescue => error
    puts "#{error}: in #{line}"
    line
  end
end

.save_options(options) ⇒ Object



88
89
90
# File 'lib/oddb2xml/util.rb', line 88

def self.save_options(options)
  @options = options
end

.setEan13forNo8(no8, ean13) ⇒ Object



248
249
250
251
252
253
254
255
256
257
258
# File 'lib/oddb2xml/util.rb', line 248

def self.setEan13forNo8(no8, ean13)
  if ean13.to_i == 7680006660045 || ean13.to_i == 7680006660014
    Oddb2xml.log "setEan13forNo8 #{no8} ean13 #{ean13}"
  end
  if @@no8_to_ean13[no8].nil?
    @@no8_to_ean13[no8] = ean13
    @@ean13_to_no8[ean13] = no8
  elsif !@@no8_to_ean13[no8].eql?(ean13)
    Oddb2xml.log "@@no8_to_ean13[no8] #{@@no8_to_ean13[no8]} not overridden by #{ean13}"
  end
end

.setEan13forProdno(prodno, ean13) ⇒ Object



239
240
241
242
243
244
245
246
# File 'lib/oddb2xml/util.rb', line 239

def self.setEan13forProdno(prodno, ean13)
  if ean13.to_i == 7680006660045 || ean13.to_i == 7680006660014
    Oddb2xml.log "setEan13forProdno #{prodno} ean13 #{ean13}"
  end
  @@prodno_to_ean13[prodno] ||= []
  @@prodno_to_ean13[prodno] << ean13
  @@ean13_to_prodno[ean13] = prodno
end

.skip_download(file) ⇒ Object



96
97
98
99
100
101
102
103
104
# File 'lib/oddb2xml/util.rb', line 96

def self.skip_download(file)
  return false if defined?(VCR)
  dest = "#{DOWNLOADS}/#{File.basename(file)}"
  if File.exist?(dest)
    FileUtils.cp(dest, file, verbose: false, preserve: true) unless File.expand_path(file).eql?(dest)
    return true
  end
  false
end

.skip_download?Boolean

Returns:

  • (Boolean)


92
93
94
# File 'lib/oddb2xml/util.rb', line 92

def self.skip_download?
  @options[:skip_download]
end

.uri_open(url) ⇒ Object



10
11
12
13
14
15
16
17
# File 'lib/oddb2xml/util.rb', line 10

def self.uri_open(url)
  version = RUBY_VERSION.split(".").map { |x| x.to_i }
  if (version <=> [2, 5, 0]) >= 0
    URI.parse(url).open
  else
    IO.popen(url)
  end
end

.validate_via_xsd(xsd_file, xml_file) ⇒ Object



219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/oddb2xml/util.rb', line 219

def self.validate_via_xsd(xsd_file, xml_file)
  xsd = IO.open(xsd_file).read
  xsd_rtikelstamm_xml = Nokogiri::XML::Schema(xsd)
  doc = Nokogiri::XML(File.read(xml_file))
  xsd_rtikelstamm_xml.validate(doc).each do |error|
    if error.message
      puts "Failed validating #{xml_file} with #{File.size(xml_file)} bytes using XSD from #{xsd_file}"
      puts "CMD: xmllint --noout --schema #{xsd_file} #{xml_file}"
    end
    msg = "expected #{error.message} to be nil\nfor #{xml_file}"
    puts msg
    expect(error.message).to be_nil, msg
  end
end

.verify_sha256(file) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/oddb2xml/util.rb', line 203

def self.verify_sha256(file)
  f = File.open(file)
  doc = Nokogiri::XML(f)
  nr = 0
  doc.root.elements.each do |node|
    nr += 1
    next if node.name.eql?("RESULT")
    sha256 = Digest::SHA256.hexdigest node.text
    unless node["SHA256"].eql?(sha256)
      puts "Verifiying #{node["SHA256"]} != expectd #{sha256} against node #{node.text} failed"
      exit(3)
    end
  end
  true
end