Class: Bio::MAF::Tiler

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/tiler.rb

Overview

Tiles a given genomic interval. Inspired by: lib/bx/align/tools/tile.py in bx-python

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeTiler

Returns a new instance of Tiler.



18
19
20
# File 'lib/bio/maf/tiler.rb', line 18

def initialize
  @species_map = {}
end

Instance Attribute Details

#indexObject

Returns the value of attribute index.



10
11
12
# File 'lib/bio/maf/tiler.rb', line 10

def index
  @index
end

#intervalObject

GenomicInterval



14
15
16
# File 'lib/bio/maf/tiler.rb', line 14

def interval
  @interval
end

#parserObject

Returns the value of attribute parser.



11
12
13
# File 'lib/bio/maf/tiler.rb', line 11

def parser
  @parser
end

#referenceObject

Returns the value of attribute reference.



12
13
14
# File 'lib/bio/maf/tiler.rb', line 12

def reference
  @reference
end

#speciesObject

Returns the value of attribute species.



15
16
17
# File 'lib/bio/maf/tiler.rb', line 15

def species
  @species
end

#species_mapObject

Returns the value of attribute species_map.



16
17
18
# File 'lib/bio/maf/tiler.rb', line 16

def species_map
  @species_map
end

Instance Method Details

#ref_data(range) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/bio/maf/tiler.rb', line 22

def ref_data(range)
  if reference
    if reference.respond_to? :read_interval
      reference.read_interval(range.begin, range.end)
    elsif reference.is_a? String
      reference.slice(range)
    else
      raise "Unhandled reference data source: #{reference}"
    end
  else
    nil
  end
end

#runs(mask) {|cur_start...mask.size, cur| ... } ⇒ Object

Yields:

  • (cur_start...mask.size, cur)


96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/bio/maf/tiler.rb', line 96

def runs(mask)
  cur = nil
  cur_start = nil
  mask.each_with_index do |obj, i|
    if ! cur.equal?(obj)
      yield(cur_start...i, cur) if cur
      cur = obj
      cur_start = i
    end
  end
  yield(cur_start...mask.size, cur)
end

#tileObject



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/bio/maf/tiler.rb', line 36

def tile
  parser.sequence_filter[:only_species] = @species
  # TODO: remove gaps
  blocks = index.find([interval], parser).sort_by { |b| b.vars[:score] }
  mask = Array.new(interval.length, :ref)
  i_start = interval.zero_start
  i_end = interval.zero_end
  if reference
    ref_region = ref_data(i_start...i_end)
  end
  blocks.each do |block|
    ref = block.ref_seq
    slice_start = [i_start, ref.start].max
    slice_end = [i_end, ref.end].min
    mask.fill(block,
              (slice_start - i_start)...(slice_end - i_start))
  end
  text = []
  species.each { |s| text << '' }
  nonref_text = text[1...text.size]
  runs(mask) do |range, block|
    g_range = (range.begin + i_start)...(range.end + i_start)
    if block == :ref
      # not covered by an alignment block
      # use the reference sequence if given, otherwise 'N'
      range_size = range.end - range.begin
      text[0] << if ref_region
                   ref_region.slice(range)
                 else
                   'N' * range_size
                 end
      stars = '*' * range_size
      nonref_text.each { |t| t << stars }
    else
      # covered by an alignment block
      t_range = block.ref_seq.text_range(g_range)
      species.each_with_index do |species, i|
        sp_text = text[i]
        seq = block.sequences.find { |s| s.source == species || s.species == species }
        if seq
          # got alignment text
          sp_text << seq.text.slice(t_range)
        else
          # no alignment for this one here, use '*'
          sp_text << '*' * (t_range.end - t_range.begin)
        end
      end
    end
  end
  text
end

#write_fasta(f) ⇒ Object



88
89
90
91
92
93
94
# File 'lib/bio/maf/tiler.rb', line 88

def write_fasta(f)
  species.zip(tile()) do |species, text|
    sp_out = species_map[species] || species
    f.puts ">#{sp_out}"
    f.puts text
  end
end