Class: Oddb2xml::RefdataExtractor

Inherits:
Extractor
  • Object
show all
Defined in:
lib/oddb2xml/extractor.rb

Instance Attribute Summary

Attributes inherited from Extractor

#xml

Instance Method Summary collapse

Constructor Details

#initialize(xml, type) ⇒ RefdataExtractor

Returns a new instance of RefdataExtractor.



204
205
206
207
# File 'lib/oddb2xml/extractor.rb', line 204

def initialize(xml, type)
  @type = (type == :pharma ? "PHARMA" : "NONPHARMA")
  super(xml)
end

Instance Method Details

#to_hashObject



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/oddb2xml/extractor.rb', line 209

def to_hash
  data = {}
  result = SwissRegArticleEntry.parse(@xml.sub(STRIP_FOR_SAX_MACHINE, ""), lazy: true)
  items = result.ARTICLE.ITEM
  items.each do |pac|
    ean13 = (gtin = pac.GTIN.to_s) ? gtin : "0"
    if ean13.size < 13
      puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
      ean13 = ean13.rjust(13, "0")
    end
    if ean13.size == 14 && ean13[0] == "0"
      puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
      ean13 = ean13[1..-1]
    end
    # but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
    item = {}
    item[:ean13] = ean13
    item[:no8] = pac.SWMC_AUTHNR
    item[:data_origin] = "refdata"
    item[:refdata] = true
    item[:_type] = (typ = pac.ATYPE.downcase.to_sym) ? typ : ""
    item[:last_change] = (date = Time.parse(pac.DT).to_s) ? date : "" # Date and time of last data change
    item[:desc_de] = (dscr = pac.NAME_DE) ? dscr : ""
    item[:desc_fr] = (dscr = pac.NAME_FR) ? dscr : ""
    item[:desc_it] = item[:desc_de] # refdata has no italian name
    item[:atc_code] = (code = pac.ATC) ? code.to_s : ""
    item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam : ""
    item[:company_ean] = (gln = pac.AUTH_HOLDER_GLN) ? gln : ""
    data[item[:ean13]] = item
  end
  data
end