Class: ODDB::Import::PharmNet::Importer

Inherits:
Importer
  • Object
show all
Defined in:
lib/oddb/import/pharmnet.rb

Constant Summary collapse

ERROR_EXPLANATIONS =
{
  "execution expired"                          => "the server stopped responding.",
  "503 => Net::HTTPServiceUnavailable"         => "the server is unavailable: http://en.wikipedia.org/wiki/HTTP_503#5xx_Server_Error",
  "Invalid RTF-File: Text before rtf-version"  => "the link pointed to a file that could not be parsed as RTF (probably a PDF)",
  "Multiple assignment of Registration-Number" => <<-EOS,
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Importer

#capitalize_all, #company_name, #postprocess, #utf8

Constructor Details

#initializeImporter

Returns a new instance of Importer.



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/oddb/import/pharmnet.rb', line 253

def initialize
  @stop = /(Pharma(ceuticals|zeutische\s*Fabrik)?|Arzneim(ittel|\.)|GmbH|[u&]\.?\s*Co\.?|Kg|Ltd\.?|')\s*/i
  @htmlentities = HTMLEntities.new
  @result_cache = {}
  @distance_cache = {}
  @errors = {}
  @assigned = Hash.new 0
  @removed = Hash.new 0
  @not_removed = Hash.new 0
  @repaired = 0
  @reparsed_fis = 0
  @reparsed_pis = 0
  @products_created = 0
  @sequences_created = 0
  @packages_created = 0
  @archive = File.join ODDB.config.var, 'rtf', 'pharmnet'
  @sources = {}
  FileUtils.mkdir_p @archive
  @latest = File.join ODDB.config.var, 'html', 'pharmnet', 'latest.html'
  FileUtils.mkdir_p File.dirname(@latest)
  super
end

Instance Attribute Details

#errorsObject (readonly)

Returns the value of attribute errors.



252
253
254
# File 'lib/oddb/import/pharmnet.rb', line 252

def errors
  @errors
end

Instance Method Details

#_assign_info(key, doc, sequence, opts = {}) ⇒ Object



299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/oddb/import/pharmnet.rb', line 299

def _assign_info(key, doc, sequence, opts={})
  info = sequence.send(key)
  return unless info.empty? || opts[:replace]

  ODDB.logger.debug('PharmNet') { 
    sprintf("Assigning %s to %s", key, sequence_name(sequence))
  }
  info.de = doc
  @assigned[key] += 1
  doc.save
  info.save
  sequence.save
end

#_composition_paired_relevance(agent, detail) ⇒ Object



361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
# File 'lib/oddb/import/pharmnet.rb', line 361

def _composition_paired_relevance(agent, detail)
  adose = agent.dose.to_f
  ddose = detail[:dose].to_f
  drel = if(adose == 0 || adose == ddose)
           1
         else
           if(adose < ddose)
             ddose, adose = adose, ddose
           end
           ddose / adose
         end rescue 0
  ignore = /hydrochlorid/
  subname = agent.substance.name.de.gsub(ignore, '')
  detname = detail[:substance].gsub(ignore, '')
  srel = ngram_similarity(subname, detname)
  drel + srel
end

#_exclusive_permutation(left, right) ⇒ Object



428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
# File 'lib/oddb/import/pharmnet.rb', line 428

def _exclusive_permutation(left, right)
  if(left.size == 1)
    [[[left.first, right.first]]]
  else
    result = []
    left.each { |first|
      pass_left = left.reject { |val| val == first }
      right.inject(result) { |memo, second|
        pass_right = right.reject { |val| val == second }
        _exclusive_permutation(pass_left, pass_right).each { |rest|
          memo.push [[first, second]].concat(rest)
        }
      }
    }
    result
  end
end

#_extract_details(span) ⇒ Object



472
473
474
# File 'lib/oddb/import/pharmnet.rb', line 472

def _extract_details(span)
  @htmlentities.decode(span.inner_html).gsub(/[\t\n]|\302\240/, '')
end


475
476
477
478
479
480
481
482
483
484
485
486
487
# File 'lib/oddb/import/pharmnet.rb', line 475

def _extract_newest_link(data, key, search, page)
  hrefs = page.links.inject([]) { |memo, link|
    if(/#{search}\b/i.match link.text)
      str = link.text[/(\d{2}\.){2}\d{4}/]
      memo.push [Date.new(*str.split('.').reverse.collect { |num| num.to_i}), 
        link.href]
    end
    memo
  }.sort
  if(oldest = hrefs.last) 
    data.update :"date_#{key}" => oldest.first, key => oldest.last
  end
end

#_extract_result(node) ⇒ Object



505
506
507
508
509
510
511
512
513
# File 'lib/oddb/import/pharmnet.rb', line 505

def _extract_result node
  rows = (node/"tr")[2..-4] || []
  rows.collect { |row|
    { 
      :data => (row/"td//span[@title]").collect { |span| span["title"] },
      :href => (row/"a[@name]").first["href"], 
    }
  }
end

#_import(agent, sequences, opts = { :replace => false, :reload => false, :remove => false, :repair => false, :reparse => false, :reparse_patinfo => false, :retries => 3, :retry_unit => 60 }) ⇒ Object



664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
# File 'lib/oddb/import/pharmnet.rb', line 664

def _import(agent, sequences, opts = { :replace => false, 
                                       :reload  => false, 
                                       :remove  => false, 
                                       :repair  => false,
                                       :reparse => false,
                                       :reparse_patinfo => false,
                                       :retries => 3,
                                       :retry_unit => 60 })
  agent = RenewableAgent.new agent
  if resume = opts[:resume]
    resume = resume.to_s.downcase
    sequences = sequences.select { |sequence| 
      (name = sequence_name(sequence)) && name.downcase >= resume
    }
  else
    sequences = sequences.select { |sequence|
      sequence_name(sequence)
    }
  end
  sequences = sequences.sort_by { |sequence|
    sequence_name(sequence)
  }
  count = 0
  head = sequences.first.name
  @checked = "Checked 0 Sequences"
  ## let odba cache release unneeded sequences ...
  sequences.collect! { |sequence| sequence.odba_id }
  while odba_id = sequences.shift
    begin
      ## ... and refetch them when necessary
      sequence = ODBA.cache.fetch(odba_id)
      count += 1
      @checked = sprintf "Checked %i Sequences from '%s' to '%s'",
                        count, head, sequence_name(sequence)
      process(agent, sequence, opts)
    rescue ODBA::OdbaError
    end
  end
  report
end

#_search_invalid?(page, term) ⇒ Boolean

Returns:

  • (Boolean)


1090
1091
1092
1093
1094
1095
1096
1097
# File 'lib/oddb/import/pharmnet.rb', line 1090

def _search_invalid?(page, term)
  div = (page/"div.wbsectionsubtitlebar").last
  if(div.nil?)
    ''
  elsif(!/Arzneimittelname:\s#{Regexp.escape(term)}\?/i.match(div.inner_text))
    div.inner_text[/Arzneimittelname:[^?]+/]
  end
end

#_suitable_data(data, comparison, opts) ⇒ Object



1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
# File 'lib/oddb/import/pharmnet.rb', line 1143

def _suitable_data(data, comparison, opts)
  opts[:cutoff] ||= 0.25
  idx = 0
  raw = data[:data].dup
  comp = comparison.dup
  
  unless(opts[:keep_dose])
    part = Regexp.escape(raw[1].to_s).gsub('\ ', ')|(')
    ptrn = /(#{part})|(\b\d+\s*m?g(\s*\/\s*\d+\s*h)?)[\-\s]*/i
    raw[0] = raw[0].gsub(ptrn, '')
    comp[0] = comp[0].gsub(ptrn, '')
  end

  tabl = /([a-z]{4,})tab.*/i
  raw[1] = raw[1].to_s.gsub(tabl, '\1')
  # Import::Csv::ProductInfos passes a comparison without Galenic Form if 
  #                           no suitable data is found on the first try
  if comp[1] 
    comp[1] = comp[1].to_s.gsub(tabl, '\1')
  end
  dists = raw.collect { |str|
    str = str.to_s
    othr = comparison[idx]
    other = othr ? othr.to_s : str
    idx += 1

    relevance = ngram_similarity str.gsub(@stop, ''), other.gsub(@stop, '')
    return if relevance < opts[:cutoff]
    relevance
  }
  if(subcount = opts[:subcount])
    cdist = (comp = data[:composition]) ? (subcount - comp.size).abs : subcount
    dists.push(cdist) unless cdist > 0
  else
    dists
  end
end

#assign_info(key, agent, data, sequence, opts) ⇒ Object



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/oddb/import/pharmnet.rb', line 275

def assign_info(key, agent, data, sequence, opts)
  return(remove_info key, sequence, opts) unless(url = data[key])

  sequence.send "#{key}_url=", "http://gripsdb.dimdi.de#{url}"
  term = data[:search_term]
  doc = import_rtf key, agent, url, term, opts
  doc.date = data[:"date_#{key}"]
  # arbitrary cutoff: fachinfos with less than 5 chapters can't be right...
  if doc.chapters.size > 5
    _assign_info key, doc, sequence, opts
  else
    ODDB.logger.debug('PharmNet') { 
      sprintf("Discarding %s for %s (%s)", key, sequence_name(sequence), term)
    }
    remove_info key, sequence, opts
  end
rescue Timeout::Error, StandardError => error
  sequence.save
  ODDB.logger.error('PharmNet') {
    sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
  }
  (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '', 
    error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip, url ]
end

#assign_registration(sequence, registration) ⇒ Object



312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/oddb/import/pharmnet.rb', line 312

def assign_registration(sequence, registration)
  if(registration && sequence.code(:registration, 'EU') != registration)
    ODDB.logger.debug('PharmNet') { 
      sprintf('Assigning Registration-Number %s to %s', 
              registration, sequence_name(sequence))
    }
    if unique_registration? registration
      conflict = Drugs::Sequence.find_by_code(:value   => registration,
                                              :type    => 'registration',
                                              :country => 'EU')
      if(conflict && conflict != sequence)
        raise sprintf("Multiple assignment of Registration-Number %s (%s-%i/%s-%i)",
                      registration, sequence_name(sequence), sequence.odba_id,
                      conflict.name.de, conflict.odba_id)
      end
    end
    if(code = sequence.code(:registration, 'EU'))
      code.value = registration
    else
      sequence.add_code Util::Code.new(:registration, registration, 'EU')
    end
    sequence.save
  end
end

#best_data(sequence, result) ⇒ Object



336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/oddb/import/pharmnet.rb', line 336

def best_data(sequence, result)
  sname = sequence.name
  unless sname.de
    sname = sequence.product.name
  end
  comparison = [
    sname,
    (gf = sequence.galenic_forms.first) && gf.description,
    (comp = sequence.company) && comp.name,
  ].collect { |ml| ml ? ml.de : '' }
  suitable = suitable_data comparison, result, 
                           :subcount => sequence.active_agents.size
  max = 0
  relevances = suitable.collect { |data|
    rel = composition_relevance(sequence.active_agents, data)
    max = rel if rel > max
  }
  contenders = []
  relevances.each_with_index { |rel, idx|
    if(rel == max)
      contenders.push suitable.at(idx)
    end
  }
  contenders.sort_by { |data| data[:date_fachinfo] || data[:date_patinfo] }.last
end

#composition_relevance(agents, data) ⇒ Object



378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# File 'lib/oddb/import/pharmnet.rb', line 378

def composition_relevance(agents, data)
  details = data[:composition]
  participants = [agents.size, details.size].max
  relevances = {}
  agents.each_with_index { |agent, aidx|
    details.each_with_index { |detail, didx|
      relevances.store [aidx, didx], 
        _composition_paired_relevance(agent, detail)
    }
  }
  max = 0
  exclusive_permutation(participants).each { |pairs|
    sum = pairs.inject(0) { |memo, pair|
      memo + relevances[pair].to_f
    }
    if sum > max
      data.store :pairs, pairs
      max = sum 
    end
  }
  data.store :relevance, max / participants
end

#create_sequence(term, data, company, product, galform) ⇒ Object



400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'lib/oddb/import/pharmnet.rb', line 400

def create_sequence(term, data, company, product, galform)
  pname, gfname, cname = data[:data]
  official = pname[/^[^\d(]+/].strip
  company_name = company.name.de.gsub(@stop, '').strip
  official_with_company = [ official, company_name ].join(' ')
  @sequences_created += 1
  sequence = Drugs::Sequence.new
  composition = Drugs::Composition.new
  composition.sequence = sequence
  composition.galenic_form = galform
  data[:composition].each do |act|
    substance = import_substance act[:substance]
    agent = Drugs::ActiveAgent.new substance, act[:dose]
    agent.composition = composition
    agent.save
  end
  composition.save
  sequence.name.de = official_with_company
  sequence.marketable = data[:marketable]
  sequence.product = product
  sequence.save
  sequence
end

#exclusive_permutation(participants) ⇒ Object



423
424
425
426
427
# File 'lib/oddb/import/pharmnet.rb', line 423

def exclusive_permutation(participants)
  left = (0...participants).to_a
  right = left.dup
  _exclusive_permutation(left, right)
end

#extract_details(page) ⇒ Object



445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
# File 'lib/oddb/import/pharmnet.rb', line 445

def extract_details(page)
  data = {}
  _extract_newest_link(data, :fachinfo, "Fachinformation", page)
  _extract_newest_link(data, :patinfo, "Gebrauchsinformation", page)
  table = (page/"table[@border='1']").first or return data
  rows = (table/"tr")[1..-1] || []
  composition = rows.collect { |row|
    spans = row/"span"
    {
      :ask_nr    => _extract_details(spans[0]),
      :substance => _extract_details(spans[1]),
      :dose      => parse_dose(_extract_details(spans[2])),
    }
  }
  data.store :composition, composition
  previous = ''
  (page/"span[@class='wbtxt']").each { |span|
    case previous
    when /Reg\.?-Nr\.?/
      data.store :registration, span.inner_text
    when /Verkehrsf/
      data.store :marketable, span.inner_text.include?('ja')
    end
    previous = span.inner_text
  }
  data
end

#extract_result(agent, page) ⇒ Object



488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
# File 'lib/oddb/import/pharmnet.rb', line 488

def extract_result(agent, page)
  form = page.form("titlesForm")
  node = form.form_node
  result = _extract_result node
  hrefs = (node/"a").select { |link| 
    /^\d*1(-\d+)?$/.match link.inner_text 
  }.collect { |link| 
    link["href"] 
  }.sort.uniq[1..-1]
  if(hrefs)
    hrefs.each_with_index { |href, idx|
      page = agent.get href
      result.concat _extract_result(page.form("titlesForm").form_node)
    }
  end
  result
end

#fix_composition(agents, data) ⇒ Object



514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
# File 'lib/oddb/import/pharmnet.rb', line 514

def fix_composition(agents, data)
  details = data[:composition]
  data[:pairs].each { |aidx, didx|
    agent = agents[aidx]
    detail = details[didx]
    if(agent.dose.nil? || agent.dose.qty == 0)
      if(agent.substance == detail[:substance])
        agent.dose = detail[:dose]
        agent.save
        @repaired += 1
      elsif(!agent.chemical_equivalence)
        agent.chemical_equivalence = Drugs::ActiveAgent.new agent.substance, agent.dose
        agent.chemical_equivalence.save
        substance = import_substance detail[:substance]
        agent.substance = substance
        agent.dose = detail[:dose]
        agent.save
        @repaired += 1
      end
    end
  }
end

#get_details(agent, page, result) ⇒ Object



536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
# File 'lib/oddb/import/pharmnet.rb', line 536

def get_details(agent, page, result)
  form = page.form("titlesForm")
  form.field("parinfo").value = 'true'
  form.field("docBaseName").value = form.field('baseName').value
  form.field("magicrequestid").value = rand.to_s
  uri = URI.parse result[:href]
  form.action = uri.path
  uri.query.split('&').each { |param|
    key, value = param.split('=', 2)
    if field = form.field(key)
      field.value = value
    end
  }
  page = form.submit
end

#get_search_form(agent) ⇒ Object



551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/oddb/import/pharmnet.rb', line 551

def get_search_form(agent)
  index = "http://www.pharmnet-bund.de/dynamic/de/am-info-system/index.html"
  page = agent.get index
  form = page.form("pharmnet_amis_off_ppv")
  page = form.submit
  link = page.links.find { |l| /(?<!nicht )akzeptieren/i.match l.text }
  page = link.click
  form = page.form("search_form")
  link = page.links.find { |l| l.attributes["id"] == 'goME' }
  form.action = link.href
  form
end

#get_search_result(agent, term, sequence = nil, opts = {}) ⇒ Object



563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
# File 'lib/oddb/import/pharmnet.rb', line 563

def get_search_result(agent, term, sequence=nil, opts={})
  opts = { :info_unrestricted => false,
           :repair => false, :retries => 3,
           :retry_unit => 60 }.merge opts
  good = nil
  term = term.dup
  ODDB.logger.debug('PharmNet') { sprintf('Searching for %s', term) }
  result = []
  while result.empty?
    return if term.length < 3
    good = term.dup
    result.concat search(agent, term, sequence, opts)
    if(result.empty?)
      good = term.gsub(/\s+/, '-')
      result.concat search(agent, good, sequence, opts)
    end
    term.gsub! /\s*[^\s]+$/, ''
  end
  result.each { |data| data.store(:search_term, good) }
  result
rescue Timeout::Error, StandardError => error
  ODDB.logger.error('PharmNet') {
    sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
  }
  retries ||= opts[:retries]
  if((error.is_a?(Timeout::Error) || /ServerError/.match(error.message)) \
     && retries > 0)
    seconds = opts[:retry_unit] * 4 ** (opts[:retries] - retries)
    ODDB.logger.debug('PharmNet') {
      sprintf("Waiting %i seconds for the server to recover...", seconds)
    }
    sleep seconds
    retries -= 1
    ODDB.logger.debug('PharmNet') {
      "Renewing Mechanize-agent and starting a new Session" }
    agent.renew!
    @search_form = nil
    retry
  else
    (@errors[error.message[0,42]] ||= []).push [ sequence ? sequence_name(sequence) : '',
      error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
  end
  nil
end

#identify_details(agent, term, sequence = nil, opts = { :info_unrestricted => false, :repair => false, :retries => 3}) ⇒ Object



607
608
609
610
611
612
613
614
615
616
617
# File 'lib/oddb/import/pharmnet.rb', line 607

def identify_details(agent, term, sequence=nil, 
                     opts = { :info_unrestricted => false, 
                              :repair => false, :retries => 3})
  if result = get_search_result(agent, term, sequence, opts)
    if result.size == 1
      result.first
    else
      best_data sequence, result
    end
  end
end

#identify_product(term, data, company) ⇒ Object



618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
# File 'lib/oddb/import/pharmnet.rb', line 618

def identify_product(term, data, company)
  pname, gfname, cname = data[:data]
  official = pname[/^[^\d(]+/].strip
  company_name = company.name.de.gsub(@stop, '').strip
  official_with_company = [ official, company_name ].join(' ')
  term_with_company = [ term, company_name ].join(' ')
  [official_with_company, official, term_with_company, term].each do |cnd|
    if (candidate = Drugs::Product.find_by_name(cnd)) \
      && candidate.company == company
      return candidate
    else
      Drugs::Product.search_by_name(cnd).each do |candidate|
        if candidate.company == company
          return candidate
        end
      end
    end
  end
  ## if we can't find a product, we'll have to create a new one.
  @products_created += 1
  product = Drugs::Product.new
  product.name.de = term_with_company
  product.company = company
  product.save
end

#identify_sequence(data, product, galform) ⇒ Object



643
644
645
646
647
648
649
650
651
652
# File 'lib/oddb/import/pharmnet.rb', line 643

def identify_sequence(data, product, galform)
  if product
    doses = data[:composition].collect do |act| act[:dose] end.compact.sort
    product.sequences.find do |seq|
      seq.compositions.size == 1 \
        && seq.doses.compact.sort == doses \
        && seq.galenic_forms == [galform]
    end
  end
end

#import(agent, sequences, opts = { :replace => false, :reload => false, :remove => false, :repair => false, :reparse => false, :reparse_patinfo => false, :retries => 3, :retry_unit => 60 }) ⇒ Object



653
654
655
656
657
658
659
660
661
662
663
# File 'lib/oddb/import/pharmnet.rb', line 653

def import(agent, sequences, opts = { :replace => false, 
                                      :reload  => false, 
                                      :remove  => false, 
                                      :repair  => false,
                                      :reparse => false,
                                      :reparse_patinfo => false,
                                      :retries => 3,
                                      :retry_unit => 60 })
  Util::Mail.notify_admins sprintf("%s: %s", Time.now.strftime('%c'),
                                   self.class), _import(agent, sequences, opts)
end

#import_company(name) ⇒ Object



704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
# File 'lib/oddb/import/pharmnet.rb', line 704

def import_company(name)
  term = clean = name.gsub(@stop, '').strip
  company = Business::Company.find_by_name(term)
  while company.nil? && !term.empty?
    company = Business::Company.search_by_name(term).find do |gf|
      relevance = ngram_similarity clean, gf.name.de.gsub(@stop, '')
      relevance > 0.8
    end
    term = term.gsub /(^|\s)+\S+\s*$/, ''
  end
  if company
    company.name.add_synonym name
  else
    company = Business::Company.new
    company.name.de = name
  end
  company.save
  company
end

#import_galenic_form(description) ⇒ Object



723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
# File 'lib/oddb/import/pharmnet.rb', line 723

def import_galenic_form(description)
  galform = Drugs::GalenicForm.find_by_description(description)
  unless galform
    galform = Drugs::GalenicForm.search_by_description(description).find do |gf|
      sim = ngram_similarity description, gf.description.de
      sim > 0.75
    end
    if galform
      galform.description.add_synonym description
      galform.save
    end
  end
  unless galform
    galform = Drugs::GalenicForm.new
    galform.description.de = description
    galform.save
  end
  galform
end

#import_missing(agent, term, opts = {}) ⇒ Object



742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
# File 'lib/oddb/import/pharmnet.rb', line 742

def import_missing(agent, term, opts={})
  @checked = "Searched for FIs/GIs for '#{term}'"
  opts = { :skip_totals => true }.merge opts
  agent = RenewableAgent.new agent
  if result = get_search_result(agent, term, nil, opts)
    result.each do |data|
      company, product, galform = nil
      sequence = nil
      registration = data[:registration]
      if registration && unique_registration?(registration)
        sequence = Drugs::Sequence.find_by_code :value => registration
      end
      unless sequence
        pname, gfname, cname = data[:data]
        galform = import_galenic_form gfname
        company = import_company cname
        product = identify_product term, data, company
        sequence = identify_sequence data, product, galform
      end
      if sequence
        if opts[:repair]
          pname, gfname, cname = data[:data]
          if product = sequence.product
            product.company ||= import_company cname
          end
          company_name = product.company.name.de.gsub(@stop, '').strip
          official = pname[/^[^\d(]+/].strip
          sequence.marketable = data[:marketable]
          sequence.name.de = [ official, company_name ].join(' ')
          agents = sequence.active_agents
          relevance = composition_relevance agents, data
          fix_composition agents, data
        end
      else
        sequence = create_sequence term, data, company, product, galform
      end
      assign_registration sequence, data[:registration]
      assign_info(:fachinfo, agent, data, sequence, opts)
      assign_info(:patinfo, agent, data, sequence, opts)
      import_package sequence, data, opts
    end
  end
  report opts
end

#import_package(sequence, data, opts = {}) ⇒ Object



786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
# File 'lib/oddb/import/pharmnet.rb', line 786

def import_package(sequence, data, opts={})
  pname, gfname, _ = data[:data]
  if match = /^(?<name>.*?)\s*-\s*OP((?<size>\d+)|\((?<multi>\d+)x(?<size>\d+)\))(\((?<unit>[^)]+)\))?$/i.match(pname)
    size = match[:size].to_i
    multi = match[:multi] && match[:multi].to_i
    package = sequence.packages.find do |pac|
      pac.size == size
    end
    if package.nil?
      @packages_created += 1
      package = Drugs::Package.new
      package.add_code Util::Code.new(:cid, "oddb#{package.uid}", 'DE')
      package.name.de = match[:name]
      part = Drugs::Part.new
      part.size = size
      part.unit = import_unit gfname
      part.package = package
      part.composition = sequence.compositions.first
      part.save
      package.sequence = sequence
      package.save
    end
    package
  end
end

#import_rtf(key, agent, url, term, opts = { :reparse => false, :reload => false}) ⇒ Object



811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
# File 'lib/oddb/import/pharmnet.rb', line 811

def import_rtf(key, agent, url, term, opts = { :reparse => false, 
                                               :reload  => false})
  pklass = case key
           when :fachinfo
             FiParser
           when :patinfo
             PiParser
           end
  path = File.join @archive, File.basename(url)
  doc = Text::Document.find_by_source(url)
  ODDB.logger.debug('PharmNet') { 
    sprintf('Comparing %s-sources for %s', key, term) }
  if(doc.nil? || (opts[:reparse] && !@sources[url]))
    @sources.store url, true 
    io = nil
    if(opts[:reload] || !File.exist?(path))
      uri = URI.parse url
      uri.scheme = 'http'
      if uri.host.to_s.empty?
        uri.host = 'gripsdb.dimdi.de'
      end
      ODDB.logger.debug('PharmNet') {
        sprintf('Downloading %s for %s from %s', key, term, uri.to_s) }
      file = agent.get uri.to_s
      file.save path
      ODDB.logger.debug('PharmNet') {
        sprintf('Saving %s for %s in %s', key, term, path) }
      io = StringIO.new(file.body)
    else 
      ODDB.logger.debug('PharmNet') {
        sprintf('Reading %s for %s from %s', key, term, path) }
      io = File.open(path)
    end
    term = term.downcase.gsub(/[\s-]/, '.')
    chapters = []
    new = nil
    while !term.empty? && chapters.size < 4
      ODDB.logger.debug('PharmNet') {
        sprintf('Parsing %s with term: %s', key, term) }
      io.rewind
      new = pklass.new(term).import io
      chapters = new.chapters
      term = term.gsub /(\A|\.)[^.]*$/, ''
    end
    ## ensure that chapter-headings are bold
    new.chapters.each { |chapter|
      if((paragraph = chapter.paragraphs.first) \
         && (format = paragraph.formats.first))
        format.augment "b"
      end
    }
    new.source = url
    if doc
      doc.chapters.replace chapters
      doc.save
    else
      doc = new
    end
  end
  doc
end

#import_substance(name) ⇒ Object



872
873
874
875
876
877
878
879
880
# File 'lib/oddb/import/pharmnet.rb', line 872

def import_substance(name)
  substance = Drugs::Substance.find_by_name name
  unless(substance)
    substance = Drugs::Substance.new
    substance.name.de = name
    substance.save
  end
  substance
end

#import_unit(name) ⇒ Object



881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
# File 'lib/oddb/import/pharmnet.rb', line 881

def import_unit(name)
  unit = Drugs::Unit.find_by_name name
  unless unit
    unit = Drugs::Unit.search_by_name(name).find do |unt|
      sim = ngram_similarity name, unt.name.de
      sim > 0.75
    end
    if unit
      unit.name.add_synonym name
      unit.save
    end
  end
  unless unit
    unit = Drugs::Unit.new
    unit.name.de = name
    unit.save
  end
  unit
end

#ngram_similarity(str1, str2, n = 5) ⇒ Object



900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
# File 'lib/oddb/import/pharmnet.rb', line 900

def ngram_similarity(str1, str2, n=5)
  str1 = u(str1).downcase.gsub(/[\s,.\-\/]+/, '')
  str2 = u(str2).downcase.gsub(/[\s,.\-\/]+/, '')
  if(str1.length < str2.length)
    str1, str2 = str2, str1
  end
  parts = [ str1.length - n, 0 ].max + 1
  count = 0
  parts.times { |idx|
    if(str2.include? str1[idx, n])
      count += 1
    end
  }
  count.to_f / parts
end

#parse_dose(str) ⇒ Object



915
916
917
# File 'lib/oddb/import/pharmnet.rb', line 915

def parse_dose(str)
  Drugs::Dose.new(str[/^\d*\.\d*/].to_f, str[/[^\d\.]+$/])
end

#process(agent, sequence, opts = { :replace => false, :reload => false, :remove => false, :repair => false, :reparse => false, :reparse_patinfo => false, :retries => 3, :retry_unit => 60 }) ⇒ Object



918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
# File 'lib/oddb/import/pharmnet.rb', line 918

def process(agent, sequence, opts = { :replace => false,
                                      :reload  => false,
                                      :remove  => false, 
                                      :repair  => false,
                                      :reparse => false,
                                      :reparse_patinfo => false,
                                      :retries => 3,
                                      :retry_unit => 60 })

  return(reparse_fachinfo agent, sequence) if opts[:reparse] && !opts[:reparse_patinfo]
  return(reparse_patinfo agent, sequence) if opts[:reparse_patinfo]
  return unless sequence.fachinfo.empty? || sequence.patinfo.empty? \
                  || opts[:replace] || opts[:remove]
  data = identify_details(agent, sequence_name(sequence), sequence, opts)

  return(remove_infos sequence, opts) unless data

  cutoff = composition_relevance(sequence.active_agents, data)
  return(remove_infos sequence, opts) if(cutoff <= 1.25) # arbitrary value

  assign_info(:fachinfo, agent, data, sequence, opts)
  assign_info(:patinfo, agent, data, sequence, opts)

  fix_composition sequence.active_agents, data if(opts[:repair])

  # assign registration number if really good match
  return if(cutoff < 2) # arbitrary value
  assign_registration sequence, data[:registration]
rescue Timeout::Error, StandardError => error
  ODDB.logger.error('PharmNet') {
    sprintf("%s: %s", error.class, error.message) << "\n" << error.backtrace.join("\n")
  }
  (@errors[error.message[0,42]] ||= []).push [ sequence_name(sequence),
    error.message, error.backtrace.find { |ln| /pharmnet/.match ln }.to_s.strip ]
end

#remove_info(key, sequence, opts) ⇒ Object



953
954
955
956
957
958
959
960
961
962
963
964
965
# File 'lib/oddb/import/pharmnet.rb', line 953

def remove_info(key, sequence, opts)
  info = sequence.send(key)
  if opts[:remove] && info.de
    @removed[key] += 1
    ODDB.logger.debug('PharmNet') { 
      sprintf('Removing Fachinfo from %s', sequence_name(sequence))
    }
    info.de = nil
    sequence.save
  elsif info.de
    @not_removed[key] += 1
  end
end

#remove_infos(sequence, opts) ⇒ Object



966
967
968
969
# File 'lib/oddb/import/pharmnet.rb', line 966

def remove_infos(sequence, opts)
  remove_info :fachinfo, sequence, opts
  remove_info :patinfo, sequence, opts
end

#reparse_fachinfo(agent, sequence) ⇒ Object



970
971
972
973
974
975
976
977
978
# File 'lib/oddb/import/pharmnet.rb', line 970

def reparse_fachinfo(agent, sequence)
  if((info = sequence.fachinfo.de) && (source = info.source) \
     && (doc = import_rtf :fachinfo, agent, source, sequence_name(sequence),
                          :reparse => true))
    @reparsed_fis += 1
    info.chapters.replace doc.chapters
    info.save
  end
end

#reparse_patinfo(agent, sequence) ⇒ Object



979
980
981
982
983
984
985
986
987
# File 'lib/oddb/import/pharmnet.rb', line 979

def reparse_patinfo(agent, sequence)
  if((info = sequence.patinfo.de) && (source = info.source) \
     && (doc = import_rtf :patinfo, agent, source, sequence_name(sequence),
                          :reparse => true))
    @reparsed_pis += 1
    info.chapters.replace doc.chapters
    info.save
  end
end

#report(opts = {}) ⇒ Object



988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
# File 'lib/oddb/import/pharmnet.rb', line 988

def report opts={}
  fi_sources = { }
  pi_sources = { }
  fi_count = pi_count = 0
  unless opts[:skip_totals]
    Drugs::Sequence.all { |sequence|
      if(doc = sequence.fachinfo.de)
        fi_count += 1
        fi_sources[doc.source] = true
      end
      if(doc = sequence.patinfo.de)
        pi_count += 1
        pi_sources[doc.source] = true
      end
    }
  end
  lines = [ @checked,
    "",
    "Assigned #{@assigned[:fachinfo]} Fachinfos",
    "Removed #{@removed[:fachinfo]} Fachinfos",
    "Kept #{@not_removed[:fachinfo]} unconfirmed Fachinfos",
    ("Total: #{fi_sources.size} Fachinfos linked to #{fi_count} Sequences" \
      unless opts[:skip_totals]),
    "",
    "Assigned #{@assigned[:patinfo]} Patinfos",
    "Removed #{@removed[:patinfo]} Patinfos",
    "Kept #{@not_removed[:patinfo]} unconfirmed Patinfos",
    ("Total: #{pi_sources.size} Patinfos linked to #{pi_count} Sequences" \
      unless opts[:skip_totals]),
    "",
    "Created #@products_created Products",
    "Created #@sequences_created Sequences",
    "Created #@packages_created Packages",
    "",
    "Reparsed #@reparsed_fis Fachinfos",
    "Reparsed #@reparsed_pis Patinfos",
    "Repaired #@repaired Active Agents",
    "",
    "Errors: #{@errors.values.inject(0) do |inj, errs| inj + errs.size end}",
  ].compact
  errors = []
  @errors.sort.each do |key, instances|
    heading = "#{instances.size} x #{key}"
    lines.push " - #{heading}"
    errors.push "", "#{heading}:"
    if msg = ERROR_EXPLANATIONS[key]
      errors.push "This means that #{msg}"
    end
    errors.push ''
    errors.concat(instances.collect do |name, message, line, link|
      sprintf "%s: %s (%s) -> http://gripsdb.dimdi.de%s",
              name, message, line, link
    end)
  end
  lines.concat errors
end

#result_page(form, term) ⇒ Object



1044
1045
1046
1047
# File 'lib/oddb/import/pharmnet.rb', line 1044

def result_page(form, term)
  form.field('term').value = term
  form.submit
end

#search(agent, term, sequence = nil, opts = {}) ⇒ Object



1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
# File 'lib/oddb/import/pharmnet.rb', line 1048

def search(agent, term, sequence=nil, opts={})
  term = term.downcase
  @result_cache.fetch(term) do
    if(minimal = term[0,3])
      @result_cache.delete_if { |key, _|
        key < minimal
      }
    end
    @search_form ||= get_search_form agent
    ## if we need to repair the active agents, we want all results, otherwise only
    #  those that have a Fach- or PatInfo to parse.
    fi_only = opts[:info_unrestricted] \
      || (opts[:repair] && sequence && sequence.active_agents.any? { |act|
      act.dose.qty == 0 }) ? 'NO_RESTRICTION' : 'YES'
    set_fi_only(@search_form, fi_only)
    details = agent.transact {
      page = result_page @search_form, term
      if(found = _search_invalid? page, term)
        ODDB.logger.error('PharmNet') { 
          sprintf "Searched for '%s' but got result for '%s' - creating new session",
            term, found
        }
        agent.renew!
        @search_form = get_search_form agent
        set_fi_only(@search_form, fi_only)
        page = result_page @search_form, term
        if(_search_invalid? page, term)
          return []
        end
      end
      page.save @latest
      result = extract_result agent, page
      result.collect do |data|
        dpg = get_details agent, page, data
        detail = data.merge extract_details(dpg)
        detail.delete :href
        detail
      end
    }
    @result_cache.store term, details
  end
end

#sequence_name(sequence) ⇒ Object



1098
1099
1100
1101
1102
1103
1104
1105
1106
# File 'lib/oddb/import/pharmnet.rb', line 1098

def sequence_name sequence
  if sequence
    if name = sequence.name.de
      name
    elsif product = sequence.product
      product.name.de
    end
  end
end

#set_fi_only(form, status = "YES") ⇒ Object



1107
1108
1109
1110
1111
1112
1113
# File 'lib/oddb/import/pharmnet.rb', line 1107

def set_fi_only(form, status="YES")
  form.radiobuttons.each do |b|
    if b.name == "WFTYP" && b.value == status
      b.check
    end
  end
end

#suitable_data(comparison, selection, opts = {}) ⇒ Object



1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
# File 'lib/oddb/import/pharmnet.rb', line 1114

def suitable_data(comparison, selection, opts = {})
  max = 0
  sums = []
  preselection = []
  ODDB.logger.debug('PharmNet') { 
    "Checking for suitable data in #{selection.size} results" 
  }
  selection.each_with_index { |data, idx|
    if(dists = _suitable_data(data, comparison, opts))
      sum = dists.inject { |a,b| a+b }
      max = sum if sum > max
      sums.push sum
      preselection.push data
    end
  }
  ODDB.logger.debug('PharmNet') { 
    "Found a preselection of #{preselection.size} results" 
  }
  result = []
  sums.each_with_index { |sum, idx|
    if sum == max
      result.push preselection[idx]
    end
  }
  ODDB.logger.debug('PharmNet') { 
    "Returning the best #{result.size} results" 
  }
  result
end

#unique_registration?(code) ⇒ Boolean

Returns:

  • (Boolean)


1180
1181
1182
# File 'lib/oddb/import/pharmnet.rb', line 1180

def unique_registration? code
  !/^EU/.match code.to_s
end