Class: Oddb2xml::RefdataExtractor

Inherits:
Extractor
  • Object
show all
Defined in:
lib/oddb2xml/extractor.rb

Instance Attribute Summary

Attributes inherited from Extractor

#xml

Instance Method Summary collapse

Constructor Details

#initialize(xml, type) ⇒ RefdataExtractor

Returns a new instance of RefdataExtractor.



198
199
200
201
# File 'lib/oddb2xml/extractor.rb', line 198

def initialize(xml, type)
  @type = (type == :pharma ? 'PHARMA' : 'NONPHARMA')
  super(xml)
end

Instance Method Details

#to_hashObject



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/oddb2xml/extractor.rb', line 202

def to_hash
  data = {}
  result = SwissRegArticleEntry.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
  items = result.ARTICLE.ITEM
  items.each do |pac|
    ean13 = (gtin = pac.GTIN.to_s) ? gtin: '0'
    if ean13.size < 13
      puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}" if $VERBOSE
      ean13 = ean13.rjust(13, '0')
    end
    if ean13.size == 14 && ean13[0] == '0'
      puts "Refdata #{@type} remove leading '0' for #{ean13}" if $VERBOSE
      ean13 = ean13[1..-1]
    end
    # but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
    item = {}
    item[:ean13]           = ean13
    item[:no8]             = pac.SWMC_AUTHNR
    item[:pharmacode]      = (phar = pac.PHAR.to_s)   ? phar: '0'
    item[:data_origin]     = 'refdata'
    item[:refdata]         = true
    item[:_type]           = (typ  = pac.ATYPE.downcase.to_sym)  ? typ: ''
    item[:last_change]     = (date = Time.parse(pac.DT).to_s)  ? date: ''  # Date and time of last data change
    item[:desc_de]         = (dscr = pac.NAME_DE)   ? dscr: ''
    item[:desc_fr]         = (dscr = pac.NAME_FR)   ? dscr: ''
    item[:atc_code]        = (code = pac.ATC)    ? code.to_s : ''
    item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam: ''
    item[:company_ean]  = (gln = pac.AUTH_HOLDER_GLN)  ? gln: ''
    unless item[:pharmacode]
      item[:pharmacode] = phar
      unless data[item[:pharmacode]] # pharmacode => GTINs
        data[item[:ean13]] = []
      end
    end
    data[item[:ean13]] = item
  end
  data
end