Module: Wrnap::Etl::Stockholm
- Defined in:
- lib/wrnap/etl/stockholm.rb
Class Method Summary collapse
- .balanced_consensus_from_sequence(sequence, structure) ⇒ Object
- .consensus_structure_from_file(file) ⇒ Object
- .dot_bracket_from_stockholm(structure) ⇒ Object
- .fit_structure_to_sequence(sequence, consensus_structure) ⇒ Object
- .load_all(file) ⇒ Object
- .prune_gaps(rna) ⇒ Object
- .theta_filter(rna) ⇒ Object
Class Method Details
.balanced_consensus_from_sequence(sequence, structure) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/wrnap/etl/stockholm.rb', line 32 def balanced_consensus_from_sequence(sequence, structure) Wrnap::Rna.init_from_string( sequence, Wrnap::Rna.structure_from_bp_list( sequence.length, sequence.split(//).zip(structure.split(//)).each_with_index.inject( Wrnap::Rna.base_pairs(structure).map(&:to_a).select { |i, j| Wrnap::Rna::CANONICAL_BASES.include?(Set.new([sequence[i], sequence[j]])) } ) do |valid_bases, ((bp, symbol), i)| valid_bases - (bp == ?. && symbol != ?. ? (valid_bases.select { |bps| bps.any? { |j| i == j } }) : []) end ) ) end |
.consensus_structure_from_file(file) ⇒ Object
20 21 22 |
# File 'lib/wrnap/etl/stockholm.rb', line 20 def consensus_structure_from_file(file) dot_bracket_from_stockholm(Bio::Stockholm::Reader.parse_from_file(file)[0].gc_features["SS_cons"]) end |
.dot_bracket_from_stockholm(structure) ⇒ Object
24 25 26 |
# File 'lib/wrnap/etl/stockholm.rb', line 24 def dot_bracket_from_stockholm(structure) structure.gsub(/</, ?().gsub(/>/, ?)) end |
.fit_structure_to_sequence(sequence, consensus_structure) ⇒ Object
28 29 30 |
# File 'lib/wrnap/etl/stockholm.rb', line 28 def fit_structure_to_sequence(sequence, consensus_structure) theta_filter(prune_gaps(balanced_consensus_from_sequence(sequence, consensus_structure))) end |
.load_all(file) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/wrnap/etl/stockholm.rb', line 5 def load_all(file) entries = Bio::Stockholm::Reader.parse_from_file(file)[0] structure = consensus_structure_from_file(file) entries.records.map do |label, record| fit_structure_to_sequence(record.sequence, structure).tap do |rna| rna.comment = if !record.gs_features.nil? && record.gs_features.is_a?(Hash) record.gs_features["AC"].split(/[\/-]/).join(" ") else label.split(/[\/-]/).join(" ") end end end.wrnap end |
.prune_gaps(rna) ⇒ Object
46 47 48 |
# File 'lib/wrnap/etl/stockholm.rb', line 46 def prune_gaps(rna) Wrnap::Rna.init_from_array(rna.seq.split(//).zip(rna.str.split(//)).reject { |nucleotide, _| nucleotide == ?. }.transpose.map(&:join)) end |
.theta_filter(rna) ⇒ Object
50 51 52 53 54 55 56 |
# File 'lib/wrnap/etl/stockholm.rb', line 50 def theta_filter(rna) # Needs to happen after gap pruning. Wrnap::Rna.init_from_string( rna.seq, Wrnap::Rna.structure_from_bp_list(rna.len, rna.base_pairs.map(&:to_a).select { |i, j| (j - i).abs > 3 }) ) end |