Class: Oddb2xml::SwissmedicInfoExtractor

Inherits:
Extractor
  • Object
show all
Defined in:
lib/oddb2xml/extractor.rb

Instance Attribute Summary

Attributes inherited from Extractor

#xml

Instance Method Summary collapse

Methods inherited from Extractor

#correct_code, #initialize

Constructor Details

This class inherits a constructor from Oddb2xml::Extractor

Instance Method Details

#to_hashObject



346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/oddb2xml/extractor.rb', line 346

def to_hash
  data = Hash.new{|h,k| h[k] = [] }
  return data unless @xml.size > 0
  result = MedicalInformationsContent.parse(@xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
  result.medicalInformation.each do |pac|
    lang = pac.lang.to_s
    next unless lang =~ /de|fr/
    item = {}
    item[:refdata] = true,
    item[:name]  = (name = pac.title) ? name : ''
    item[:owner] = (ownr = pac.authHolder) ? ownr : ''
    if content = /cdata/.match(pac.content)
      html = Nokogiri::HTML(content.to_s)
      # all HTML contents without MonTitle and ownerCompany
      item[:paragraph] =  "<title><p>#{item[:name]}</p></title>" +
         ((paragraph = html.xpath("///div[@class='paragraph']")) ? paragraph.to_s : '')
      if text = html.xpath("///div[@id='Section7750']/p").text
        # 1 ~ 3 swissmedic number
        if text =~ /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/
          [$1, $2, $3].compact.each do |n| # plural
            item[:monid] = n
            data[lang] << item
          end
        end
      end
    end
  end
  data
end