Module: Annotations::Genes::SENT

Defined in:
lib/MARQ/annotations.rb

Defined Under Namespace

Classes: SENTError

Constant Summary collapse

WSDL =
"http://sent.dacya.ucm.es/wsdl/SentWS.wsdl"
@@jobs =
{}

Class Method Summary collapse

Class Method Details

.analyze(organism, genes, factors) ⇒ Object

Raises:



466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
# File 'lib/MARQ/annotations.rb', line 466

def self.analyze(organism, genes, factors)
  hash = Digest::MD5.hexdigest([organism, genes.sort].inspect)

  if @@jobs[hash]
    orig_job = @@jobs[hash]
    job = driver.refactor(orig_job, factors, 'MARQ')
  else
    job = driver.analyze(organism, genes, factors, 'MARQ')
    orig_job = job
  end

  puts "#{ job }: #{ factors }"

  while ! driver.done(job)
    sleep 5
  end

  raise SENT::SENTError, "Job failed with error #{driver.messages(job).last}" if driver.error(job)
  @@jobs[hash] = job

  summary, ccc = process_results(orig_job)
end

.driverObject



445
446
447
448
449
# File 'lib/MARQ/annotations.rb', line 445

def self.driver
  require 'soap/wsdlDriver'
  driver = SOAP::WSDLDriverFactory.new(WSDL).create_rpc_driver
  driver
end

.process_results(job) ⇒ Object



451
452
453
454
455
456
457
458
459
460
461
462
463
# File 'lib/MARQ/annotations.rb', line 451

def self.process_results(job)
  result_ids = driver.results(job)

  summary = YAML::load(Base64.decode64(driver.result(result_ids[0])))
  ccc     = Base64.decode64(driver.result(result_ids[1])).to_f
  associations = Open.to_hash(StringIO.new(driver.associations(job)), :flatten => true)

  summary.each do |group|
    group[:articles] = group[:genes].inject(0) {|acc, gene| acc += associations[gene].length}
  end

  [summary, ccc]
end

.terms(organism, genes, num = 20) ⇒ Object



489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
# File 'lib/MARQ/annotations.rb', line 489

def self.terms(organism, genes, num = 20)
  factor_list = [2,4,8,10]
  
  terms = {}
  cccs   = {}
  factor_list.each do |factors|
    summary, ccc = analyze(organism, genes, factors)
    articles = summary.inject(0) {|acc, group| acc += group[:articles] }
    terms_per_article = num.to_f / articles
    summary.each{|group|
      num_terms = [terms_per_article * group[:articles], group[:words].length].min
      terms[factors] ||= []
      terms[factors] += group[:words][0..(num_terms - 1)]
      p terms
    }
    cccs[factors] = ccc
  end

  best_k = cccs.sort_by{|p| p[1]}.first[1]

  terms[k]
end