Module: Wrnap::Etl::Stockholm

Defined in:
lib/wrnap/etl/stockholm.rb

Class Method Summary collapse

Class Method Details

.balanced_consensus_from_sequence(sequence, structure) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/wrnap/etl/stockholm.rb', line 32

def balanced_consensus_from_sequence(sequence, structure)
  Wrnap::Rna.init_from_string(
    sequence,
    Wrnap::Rna.structure_from_bp_list(
      sequence.length,
      sequence.split(//).zip(structure.split(//)).each_with_index.inject(
        Wrnap::Rna.base_pairs(structure).map(&:to_a).select { |i, j| Wrnap::Rna::CANONICAL_BASES.include?(Set.new([sequence[i], sequence[j]])) }
      ) do |valid_bases, ((bp, symbol), i)|
        valid_bases - (bp == ?. && symbol != ?. ? (valid_bases.select { |bps| bps.any? { |j| i == j } }) : [])
      end
    )
  )
end

.consensus_structure_from_file(file) ⇒ Object



20
21
22
# File 'lib/wrnap/etl/stockholm.rb', line 20

def consensus_structure_from_file(file)
  dot_bracket_from_stockholm(Bio::Stockholm::Reader.parse_from_file(file)[0].gc_features["SS_cons"])
end

.dot_bracket_from_stockholm(structure) ⇒ Object



24
25
26
# File 'lib/wrnap/etl/stockholm.rb', line 24

def dot_bracket_from_stockholm(structure)
  structure.gsub(/</, ?().gsub(/>/, ?))
end

.fit_structure_to_sequence(sequence, consensus_structure) ⇒ Object



28
29
30
# File 'lib/wrnap/etl/stockholm.rb', line 28

def fit_structure_to_sequence(sequence, consensus_structure)
  theta_filter(prune_gaps(balanced_consensus_from_sequence(sequence, consensus_structure)))
end

.load_all(file) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/wrnap/etl/stockholm.rb', line 5

def load_all(file)
  entries   = Bio::Stockholm::Reader.parse_from_file(file)[0]
  structure = consensus_structure_from_file(file)

  entries.records.map do |label, record| 
    fit_structure_to_sequence(record.sequence, structure).tap do |rna| 
      rna.comment = if !record.gs_features.nil? && record.gs_features.is_a?(Hash)
        record.gs_features["AC"].split(/[\/-]/).join(" ")
      else
        label.split(/[\/-]/).join(" ")
      end
    end
  end.wrnap
end

.prune_gaps(rna) ⇒ Object



46
47
48
# File 'lib/wrnap/etl/stockholm.rb', line 46

def prune_gaps(rna)
  Wrnap::Rna.init_from_array(rna.seq.split(//).zip(rna.str.split(//)).reject { |nucleotide, _| nucleotide == ?. }.transpose.map(&:join))
end

.theta_filter(rna) ⇒ Object



50
51
52
53
54
55
56
# File 'lib/wrnap/etl/stockholm.rb', line 50

def theta_filter(rna)
  # Needs to happen after gap pruning.
  Wrnap::Rna.init_from_string(
    rna.seq,
    Wrnap::Rna.structure_from_bp_list(rna.len, rna.base_pairs.map(&:to_a).select { |i, j| (j - i).abs > 3 })
  )
end