Class: Oddb2xml::RefdataExtractor

Inherits:
Extractor
  • Object
show all
Defined in:
lib/oddb2xml/extractor.rb

Instance Attribute Summary

Attributes inherited from Extractor

#xml

Instance Method Summary collapse

Constructor Details

#initialize(xml, type) ⇒ RefdataExtractor

Returns a new instance of RefdataExtractor.



179
180
181
182
# File 'lib/oddb2xml/extractor.rb', line 179

def initialize(xml, type)
  @type = (type == :pharma ? 'PHARMA' : 'NONPHARMA')
  super(xml)
end

Instance Method Details

#to_hashObject



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/oddb2xml/extractor.rb', line 183

def to_hash
  data = {}
  result = SwissRegArticleEntry.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
  items = result.ARTICLE.ITEM
  items.each do |pac|
    ean13 = (gtin = pac.GTIN.to_s) ? gtin: '0'
    if ean13.size < 13
      puts "Refdata #{@type} use 13 chars not #{ean13.size} for #{ean13}"
      ean13 = ean13.rjust(13, '0')
    end
    if ean13.size == 14 && ean13[0] == '0'
      puts "Refdata #{@type} remove leading '0' for #{ean13}"
      ean13 = ean13[1..-1]
    end
    # but in refdata_nonPharma we have a about 700 GTINs which are 14 characters and longer
    item = {}
    item[:data_origin]     = 'refdata'
    item[:refdata]         = true
    item[:_type]           = (typ  = pac.ATYPE.downcase.to_sym)  ? typ: ''
    item[:ean13]           = ean13
    item[:pharmacode]      = (phar = pac.PHAR.to_s)   ? phar: '0'
    item[:last_change]     = (date = Time.parse(pac.DT).to_s)  ? date: ''  # Date and time of last data change
    item[:desc_de]         = (dscr = pac.NAME_DE)   ? dscr: ''
    item[:desc_fr]         = (dscr = pac.NAME_FR)   ? dscr: ''
    item[:atc_code]        = (code = pac.ATC)    ? code.to_s : ''
    item[:company_name] = (nam = pac.AUTH_HOLDER_NAME) ? nam: ''
    item[:company_ean]  = (gln = pac.AUTH_HOLDER_GLN)  ? gln: ''
    unless item[:pharmacode]
      item[:pharmacode] = phar
      unless data[item[:pharmacode]] # pharmacode => GTINs
        data[item[:ean13]] = []
      end
    end
    data[item[:ean13]] = item
  end
  data
end