Class: Oddb2xml::SwissmedicInfoExtractor

Inherits:
Extractor
  • Object
show all
Defined in:
lib/oddb2xml/extractor.rb

Instance Attribute Summary

Attributes inherited from Extractor

#xml

Instance Method Summary collapse

Methods inherited from Extractor

#correct_code, #initialize

Constructor Details

This class inherits a constructor from Oddb2xml::Extractor

Instance Method Details

#to_hashObject



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
# File 'lib/oddb2xml/extractor.rb', line 318

def to_hash
  data = Hash.new{|h,k| h[k] = [] }
  doc = Nokogiri::XML(@xml)
  doc.xpath("//medicalInformations/medicalInformation[@type='fi']").each do |fi|
    lang = fi.attr('lang').to_s
    next unless lang =~ /de|fr/
    item = {}
    item[:name]  = (name = fi.at_xpath('.//title')) ? name.text : ''
    item[:owner] = (ownr = fi.at_xpath('.//authHolder')) ? ownr.text : ''
    if content = fi.at_xpath('.//content').children.detect{|child| child.cdata? }
      html = Nokogiri::HTML(content.to_s)
      # all HTML contents without MonTitle and ownerCompany
      item[:paragraph] =  "<title><p>#{item[:name]}</p></title>" +
         ((paragraph = html.xpath("///div[@class='paragraph']")) ? paragraph.to_s : '')
      if text = html.xpath("///div[@id='Section7750']/p").text
        # 1 ~ 3 swissmedic number
        if text =~ /(\d{5})[,\s]*(\d{5})?|(\d{5})[,\s]*(\d{5})?[,\s]*(\d{5})?/
          [$1, $2, $3].compact.each do |n| # plural
            item[:monid] = n
            data[lang] << item
          end
        end
      end
    end
  end
  data
end