Class: Contig

Inherits:
Object
  • Object
show all
Defined in:
lib/gene_assembler/contig.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name) ⇒ Contig

Returns a new instance of Contig.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/gene_assembler/contig.rb', line 8

def initialize (name)
  @name=name
  @seq=''
  @type=nil
  @length=''
  @hits=[]
  @snps=[]
  @gos=[]
  @completed=''
  @localization=[]
  @q_frameshift=[]
  @s_frameshift=[]
  @stops=[]
  @mod_coord=FALSE #Indica si se han alterado las coordenadas del contig previamente

end

Instance Attribute Details

#completedObject

Returns the value of attribute completed.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def completed
  @completed
end

#hitsObject

Returns the value of attribute hits.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def hits
  @hits
end

#lengthObject

Returns the value of attribute length.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def length
  @length
end

#mod_coordObject

Returns the value of attribute mod_coord.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def mod_coord
  @mod_coord
end

#nameObject

Returns the value of attribute name.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def name
  @name
end

#q_frameshiftObject

Returns the value of attribute q_frameshift.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def q_frameshift
  @q_frameshift
end

#s_frameshiftObject

Returns the value of attribute s_frameshift.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def s_frameshift
  @s_frameshift
end

#seqObject

Returns the value of attribute seq.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def seq
  @seq
end

#stopsObject

Returns the value of attribute stops.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def stops
  @stops
end

#typeObject

Returns the value of attribute type.



7
8
9
# File 'lib/gene_assembler/contig.rb', line 7

def type
  @type
end

Instance Method Details

#add_go(go, name, obsolete) ⇒ Object



85
86
87
88
89
# File 'lib/gene_assembler/contig.rb', line 85

def add_go(go,name,obsolete)
  go=GO.new(go,name,obsolete)
  @gos << go
  return go
end

#add_hit(hit_name, s_length, reversed, type) ⇒ Object



103
104
105
106
107
# File 'lib/gene_assembler/contig.rb', line 103

def add_hit(hit_name, s_length, reversed, type)
  hit=Hit.new(hit_name, s_length, reversed, type)
  @hits << hit
  return hit
end

#add_localization(localization) ⇒ Object



57
58
59
# File 'lib/gene_assembler/contig.rb', line 57

def add_localization(localization)
  @localization << localization
end

#add_snp(position) ⇒ Object



97
98
99
100
101
# File 'lib/gene_assembler/contig.rb', line 97

def add_snp(position)
  snp=SNP.new(position)
  @snps << snp
  return snp
end

#compare(contig) ⇒ Object




357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/gene_assembler/contig.rb', line 357

def compare(contig) #Alinea un contig con otro en base a las coordenadas del subject

  exon_match=-1
  exones=0
    match_found=FALSE
  #SELF HIT

  self.each_hit {|self_hit|
     if match_found #Romper bucle si ha habido coindidencia definitiva

       break
     end
      #SELF HSP

      self_hit.each_hsp {|self_hsp|
       if match_found
         break
       end
        #CONTIG HIT

        last=0
        contig_hsp_count=0
        contig.each_hit {|contig_hit|
          #CONTIG HSP

          contig_hit.each_hsp {|contig_hsp|#Valoracion del coverage de cada hit entre si, en el momento que el segundo de mayor se cancela

            coverage=self_hsp.compare(contig_hsp)
            if coverage>last #Guardar coincidencia a la espera de una mejor

              exon_match=contig_hsp_count
              last=coverage
            end
            if coverage>0.2 #Contaje de exones

              exones+=1
            end 
            if coverage==0 && exon_match>-1  #Romper bucle cuando se ha encontrado coincidencia y los demas exones no coinciden

              match_found=TRUE
              break
            end
            contig_hsp_count+=1
          }
          if match_found
            break
          end
         }
     }
  }
  return exon_match,exones
end

#coor_intronesObject



507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
# File 'lib/gene_assembler/contig.rb', line 507

def coor_intrones
  #Determinar bordes de los intrones

  intrones=[]
  last_hsp=nil
  if first_hit.hsp_count>1
    first_hit.each_hsp_with_index{|hsp,i|
      if i>0
        intrones << [last_hsp.q_end,hsp.q_beg]
      end
      last_hsp=hsp
    }
  end
  #---------------------------------

  return intrones
end

#correct_hsps(blast_coor_type) ⇒ Object



350
351
352
353
354
# File 'lib/gene_assembler/contig.rb', line 350

def correct_hsps(blast_coor_type)
  each_hit {|hit|
    hit.correct_hsps(blast_coor_type)
  }
end

#drawObject

Realiza una representacion del contig a nivel del subject



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/gene_assembler/contig.rb', line 180

def draw #Realiza una representacion del contig a nivel del subject

  last_hsp_end=0
  overlap=0
  first_hit.each_hsp_with_index{|hsp,c|
    dif=hsp.s_beg-last_hsp_end
    if dif>=0 && dif<=2
      print '/' # Limite solapante uno a continuacion de otro (disposicion normal del exon)

    elsif dif>2
      print '-'*(hsp.s_beg-last_hsp_end)
      print '|'
    elsif dif<0
      print '&'
      overlap=dif
    end
    h=c+1 #Num de hsp

    print "#{h.to_s.center(hsp.s_end-hsp.s_beg+overlap-1)}"
    if dif>2
      #print '|'

    end
    last_hsp_end=hsp.s_end
  }
  print '|-'
  ending=first_hit.s_length-last_hsp_end
  if ending<0
    ending=0
  end
  print '-'*(ending)
  puts "\n"
end

#each_goObject



91
92
93
94
95
# File 'lib/gene_assembler/contig.rb', line 91

def each_go
  @gos.each do |go|
    yield go
  end
end

#each_hitObject



117
118
119
120
121
# File 'lib/gene_assembler/contig.rb', line 117

def each_hit
  @hits.each do |hit|
    yield hit
  end
end

#each_hit_with_indexObject



123
124
125
126
127
# File 'lib/gene_assembler/contig.rb', line 123

def each_hit_with_index
  @hits.each_with_index do |hit,i|
    yield hit,i
  end
end

#each_localizationObject



61
62
63
64
65
# File 'lib/gene_assembler/contig.rb', line 61

def each_localization
  @localization.each do |localization|
    yield localization
  end
end

#each_localization_with_indexObject



67
68
69
70
71
# File 'lib/gene_assembler/contig.rb', line 67

def each_localization_with_index
  @localization.each_with_index do |localization,i|
    yield localization,i
  end
end

#each_q_frameshiftObject



174
175
176
177
178
# File 'lib/gene_assembler/contig.rb', line 174

def each_q_frameshift
  @q_frameshift.each do |qfs|
    yield qfs
  end
end

#each_snpObject



134
135
136
137
138
# File 'lib/gene_assembler/contig.rb', line 134

def each_snp
  @snps.each do |snp|
    yield snp
  end
end

#each_snp_with_indexObject



140
141
142
143
144
# File 'lib/gene_assembler/contig.rb', line 140

def each_snp_with_index
  @snps.each_with_index do |snp,i|
    yield snp,i
  end
end

#each_stopObject



73
74
75
76
77
# File 'lib/gene_assembler/contig.rb', line 73

def each_stop
  @stops.each do |stop|
    yield stop
  end
end

#each_stop_with_indexObject



79
80
81
82
83
# File 'lib/gene_assembler/contig.rb', line 79

def each_stop_with_index
  @stops.each_with_index do |stop,i|
    yield stop,i
  end
end

#exon_acumulativeObject

Suma la longitud de todos los exones



263
264
265
266
267
268
269
270
# File 'lib/gene_assembler/contig.rb', line 263

def exon_acumulative #Suma la longitud de todos los exones

  long=0
  exones=exones_q
  exones.each do |ex|
    long+=ex
  end
  return long
end

#exones_qObject

Devuelve un array con el tamaño de cada hsp/exon a nivel del query



229
230
231
232
233
234
235
236
237
238
# File 'lib/gene_assembler/contig.rb', line 229

def exones_q # Devuelve un array con el tamaño de cada hsp/exon a nivel del query

  exones_q=[]
  each_hit {|hit|
    hit.each_hsp{|hsp|
      long=(hsp.q_end-hsp.q_beg).abs
      exones_q << long
    }
  }
  return exones_q
end

#exones_sObject

Devuelve un array con el tamaño de cada hsp/exon a nivel del subject



218
219
220
221
222
223
224
225
226
227
# File 'lib/gene_assembler/contig.rb', line 218

def exones_s # Devuelve un array con el tamaño de cada hsp/exon a nivel del subject

  exones_s=[]
  each_hit {|hit|
    hit.each_hsp{|hsp|
      long=(hsp.s_end-hsp.s_beg).abs
      exones_s << long
    }
  }
  return exones_s
end

#first_hitObject



32
33
34
35
36
37
38
39
# File 'lib/gene_assembler/contig.rb', line 32

def first_hit
  h=nil
  each_hit{|hit|
     h=hit
     break
  }
  return h
end

#frameshift_modified_coordenates(add) ⇒ Object



168
169
170
171
172
# File 'lib/gene_assembler/contig.rb', line 168

def frameshift_modified_coordenates(add)
  @q_frameshift.length.times do |n|
    @q_frameshift[n]+=add
  end  
end

#gff(id, parent, add) ⇒ Object

Devuelve en estrutura gff los exones en genomico



571
572
573
574
575
576
577
578
579
580
581
582
583
# File 'lib/gene_assembler/contig.rb', line 571

def gff(id,parent,add) #Devuelve en estrutura gff los exones en genomico

  text=[]
  #Exones

  first_hit.each_hsp{|hsp|
#     if hsp.reversed==TRUE

#       [email protected]_beg

#       [email protected]_end

#     end

    #puts "#{hsp.q_beg+add} #{hsp.q_end+add}"

    text<<"#{id}\t.\texon\t#{hsp.q_beg+add}\t#{hsp.q_end+add}\t.\t+\t.\tID=#{parent}_exon;Parent=#{parent};Name=#{parent}_exon"
  }
  return text
end

#gff_prot(id, prot_name) ⇒ Object

Devuelve en estrutura gff los exones en proteina



585
586
587
588
589
590
591
592
593
594
# File 'lib/gene_assembler/contig.rb', line 585

def gff_prot(id,prot_name) #Devuelve en estrutura gff los exones en proteina

  #parent="#{parent}_mRNA"

  text=[]
  #Exones

  first_hit.each_hsp{|hsp|
    #puts "#{hsp.q_beg+add} #{hsp.q_end+add}"

    text<<"#{id}\t.\tprotein_match\t#{hsp.s_beg}\t#{hsp.s_end}\t.\t+\t.\tID=#{prot_name}_prot;Parent=#{prot_name};Name=#{id}_prot"
  }
  return text
end

#has_hit?Boolean

Returns:

  • (Boolean)


109
110
111
112
113
114
115
# File 'lib/gene_assembler/contig.rb', line 109

def has_hit?
  has_hit=FALSE
  if @hits.length>0
    has_hit=TRUE
  end
  return has_hit
end

#hit_countObject



49
50
51
52
53
54
55
# File 'lib/gene_assembler/contig.rb', line 49

def hit_count
  count=0
  each_hit{|hit|
    count+=1
  }
  return count
end

#hits_sort!Object



146
147
148
149
150
# File 'lib/gene_assembler/contig.rb', line 146

def hits_sort!
  each_hit{|hit|
    hit.hsps_sort!  
  }
end

#hsp_at(position) ⇒ Object



623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
# File 'lib/gene_assembler/contig.rb', line 623

def hsp_at(position)
  hsp_ret=nil
  count_hsp=0
  found=FALSE
  each_hit {|hit|
    hit.each_hsp {|hsp|
      if position==count_hsp
        hsp_ret=hsp
        found=TRUE
        break
      end
      count_hsp+=1
    }
    if found
      break
    end  
  }
  return hsp_ret
end

#hsp_minor_than?(hsp_length) ⇒ Boolean

En nt

Returns:

  • (Boolean)


337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/gene_assembler/contig.rb', line 337

def hsp_minor_than?(hsp_length) # En nt

  minor=FALSE
  each_hit_with_index {|hit,i|
    if i>0 || i<hit.hsp_count
      if hit.hsp_minor_than?(hsp_length)
        minor=TRUE
        break
      end
    end
  }
  return minor
end

#indicesObject

Muestra los indices de subject y query del contig



210
211
212
213
214
215
216
# File 'lib/gene_assembler/contig.rb', line 210

def indices #Muestra los indices de subject y query del contig

  each_hit_with_index {|hit,ind|
    hit.each_hsp_with_index{|hsp,i| 
      puts "#{ind+1}.#{i+1})\t#{hsp.q_beg}\t#{hsp.q_end}\t#{hsp.s_beg}\t#{hsp.s_end}\t#{@name}\t#{@length}\t#{@mod_coord}"
    }
  }
end

#intrones_qObject

Devuelve un array con el tamaño de cada intron a nivel del query



240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/gene_assembler/contig.rb', line 240

def intrones_q # Devuelve un array con el tamaño de cada intron a nivel del query

  intrones_q=[]
  l=first_hit.hsp_count
  each_hit {|hit|
    hit.each_hsp_with_index{|hsp,ind|
      if !first_hit.hsp_at(ind+1)
        break
      end
      long=(first_hit.hsp_at(ind+1).q_beg-hsp.q_end).abs
      intrones_q << long
    }
  }
  return intrones_q
end

#is_gapped?Boolean

Examina si hay gaps internos en la estructura del gen q se mapea sobre la proteina q pudieran señalar la falta parcial o completa de un exon

Returns:

  • (Boolean)


304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
# File 'lib/gene_assembler/contig.rb', line 304

def is_gapped? #Examina si hay gaps internos en la estructura del gen q se mapea sobre la proteina q pudieran señalar la falta parcial o completa de un exon

  gap=3 #Gap maximo permitido medido en aa, como minimo poner a 1

  gapped=FALSE
  s_end_last=0
  @hits.first.hsps.each do |hsp|
    if s_end_last >0
      dif=hsp.s_beg-s_end_last
      if dif>gap #En caso de coordenadas solapantes, siempre saldra dif negativo, con lo que la condicion gap no se cumple

        gapped=TRUE
        break
      end
    end
    s_end_last=hsp.s_end
  end
  return gapped
end

#is_one_hsp?Boolean

Examina si el hit esta compuesto por un solo hsp

Returns:

  • (Boolean)


296
297
298
299
300
301
302
# File 'lib/gene_assembler/contig.rb', line 296

def is_one_hsp? #Examina si el hit esta compuesto por un solo hsp

  is_one=FALSE
  if first_hit.hsp_count==1
    is_one=TRUE
  end
  return is_one
end

#is_truncated?Boolean

Examina si los exones en el borde del contig estan truncados/interrumpidos

Returns:

  • (Boolean)


321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# File 'lib/gene_assembler/contig.rb', line 321

def is_truncated? #Examina si los exones en el borde del contig estan truncados/interrumpidos

  truncated=FALSE
#   puts self.name

#   puts @hits.inspect

  #Truncado por el inicio

  if first_hit.first_hsp.s_beg>1 && first_hit.first_hsp.q_beg==1
    truncated=TRUE
  end  
  
  #Truncado por el final

  if first_hit.last_hsp.q_end==@length
    truncated=TRUE
  end  
  return truncated
end

#last_hitObject



41
42
43
44
45
46
47
# File 'lib/gene_assembler/contig.rb', line 41

def last_hit
  h=nil
  each_hit{|hit|
     h=hit
  }
  return h
end

#mixed?Boolean

Funciones para comprobar validez de los contig

Returns:

  • (Boolean)


273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/gene_assembler/contig.rb', line 273

def mixed? #Examina si los hsp de un hit estan desordenados

  is_mix=FALSE
  beg=nil
  sign=0
  sign_local=0
  @hits.first.hsps.each_with_index do |hsp,c|
    if !beg.nil?
      dif=hsp.q_beg-beg
      if dif>=0
        sign_local=1
      else sign_local=0
      end
      if sign_local!=sign && c>1
        is_mix=TRUE
        break
      end
    end
    sign=sign_local
    beg=hsp.q_beg
  end
  return is_mix
end

#modified_coordenates(add) ⇒ Object



152
153
154
155
156
157
158
159
160
# File 'lib/gene_assembler/contig.rb', line 152

def modified_coordenates(add)
  @mod_coord=TRUE
  each_hit{|hit|
    hit.modified_coordenates(add)
    stop_modified_coordenates(add)
    frameshift_modified_coordenates(add)
  }
  return last_hit.last_hsp.q_end
end

#n_hits?Boolean

Returns:

  • (Boolean)


129
130
131
132
# File 'lib/gene_assembler/contig.rb', line 129

def n_hits?
  n=@hits.length
  return n
end

#n_intronObject



255
256
257
258
259
260
261
# File 'lib/gene_assembler/contig.rb', line 255

def n_intron
  n_intron=0
  each_hit{|hit|
     n_intron+=1  
  }
  return n_intron
end

#overlapObject



606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
# File 'lib/gene_assembler/contig.rb', line 606

def overlap
  overlap=[]
  last_hit=nil
  each_hit_with_index{|hit,i|
    overlap << hit.hsp_overlap
    if i>0
      diference=hit.overlap_with(last_hit)
      if diference<0
        overlap << diference
      end
    end
    last_hit=hit
    }
  overlap.flatten!
  return overlap
end

#rev_compObject



407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/gene_assembler/contig.rb', line 407

def rev_comp
  rev_seq=[]
    @seq.each_char do |char|
      char.upcase!
      if char =='A'
        rev_seq << 'T'
      elsif char =='T'
        rev_seq << 'A'
      elsif char =='G'
        rev_seq << 'C'
      elsif char =='C'
        rev_seq << 'G'
      else
        rev_seq << char
      end
    end
    rev_seq.reverse!
    @seq=rev_seq.join
    
end

#rev_comp_if_hitObject

Devuelve la secuencia reversocomplementaria del contig



400
401
402
403
404
405
# File 'lib/gene_assembler/contig.rb', line 400

def rev_comp_if_hit #Devuelve la secuencia reversocomplementaria del contig

  if first_hit.reversed
    rev_comp
    first_hit.reversed=FALSE
  end
end

#rev_coordObject



428
429
430
431
432
433
# File 'lib/gene_assembler/contig.rb', line 428

def rev_coord
  each_hit {|hit|
    hit.rev_coord(@length)
    hit.hsps_sort!
  }
end

#start_codon_searchObject

Busqueda codon inicio, busca si existe y una vez encontrado modifica coordenadas para acomodar el codon



435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/gene_assembler/contig.rb', line 435

def start_codon_search #Busqueda codon inicio, busca si existe y una vez encontrado modifica coordenadas para acomodar el codon

  exists=FALSE
  s_beg=@hits.first.hsps.first.s_beg
  s_end=@hits.first.hsps.first.s_end
  q_beg=@hits.first.hsps.first.q_beg
  q_end=@hits.first.hsps.first.q_end
  if s_beg<=10 # Se busca codon si la proteina carece de los 10 primeros aa

    continue=TRUE
    index=0
    temp_index=0
    while continue==TRUE 
      if temp_index==0 
        find=@seq.index('ATG')
      else
        find=@seq.index('ATG',temp_index+1)
      end  
      find+=1
      if find==nil
        continue=FALSE
      else
        if find==q_beg
          continue=FALSE
          index=find           
        elsif find>(q_beg-1+3)
          continue=FALSE
        else
          if (find-q_end).modulo(3)==0 || find-q_end==0
            index=find
          end
          if temp_index==0
            temp_index=find+1
          else
            temp_index=find
          end
        end
      end
    end#While

    if index>0
      @hits.first.hsps.first.s_beg=1
      @hits.first.hsps.first.q_beg=index
      exists=TRUE
    end
  end
  return exists
end

#stop_codon(codon, ends, *beg) ⇒ Object

Busqueda posibles codones de parada



481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
# File 'lib/gene_assembler/contig.rb', line 481

def stop_codon(codon,ends,*beg) #Busqueda posibles codones de parada

  reference=ends-1
  if !beg.empty?
    reference=ends
    ends=beg.first
  end
  position=nil
  follow=TRUE
  while follow
    pos=@seq.index(codon,ends)#Implicitamente lleva el +1

    if pos.nil?
      follow=FALSE
    else
      dif=(pos-reference).abs
      #puts "#{pos} #{dif} #{reference}"

      if dif.modulo(3)==0
        position=pos
        follow=FALSE
      else
        ends=pos+1
      end
    end
  end
  return position
end

#stop_codon_searchObject

Busqueda codon de parada, busca si existe



523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# File 'lib/gene_assembler/contig.rb', line 523

def stop_codon_search #Busqueda codon de parada, busca si existe

  exists=FALSE
  homology_start=first_hit.first_hsp.q_beg
  homology_end=first_hit.last_hsp.q_end
  n=1
  codon=''
  position=nil
  intrones=coor_intrones
  @seq.chars do |c|
    if n<homology_start#Comenzar comparacion al principio del primer exon

      n+=1
      next
    end
    if n>homology_end#Terminar comparacion

      break
    end
    #Saltar intrones

    if !intrones.empty?
      jump=FALSE
      intrones.each do |int|
        if n>int[0] && n<int[1] #NO se incluye el borde del exon

          #print 'i'

          jump=TRUE
          break
        end
      end
      if jump
        n+=1
        next
      end
    end
    #-----------------

    codon+=c
    if codon.length==3 #Comparacion del codon

      #puts position.to_s+"\t"+codon

      if codon=='TAG'||codon=='TAA'||codon=='TGA'
        @stops << position
        exists=TRUE
      end
      codon=''
    elsif codon.length==1 #Guardar posicion del primer nucleotido del codon

      position=n
    end
    n+=1
  end
  return exists
end

#stop_modified_coordenates(add) ⇒ Object



162
163
164
165
166
# File 'lib/gene_assembler/contig.rb', line 162

def stop_modified_coordenates(add)
  @stops.length.times do |n|
    @stops[n]+=add
  end  
end

#transfer_contig_hits(contig) ⇒ Object



596
597
598
599
600
# File 'lib/gene_assembler/contig.rb', line 596

def transfer_contig_hits(contig)
  contig.each_hit{|hit|
    self.transfer_hit(hit)
  }
end

#transfer_hit(hit) ⇒ Object



602
603
604
# File 'lib/gene_assembler/contig.rb', line 602

def transfer_hit(hit)
  @hits << hit
end