Module: Wrnap::Etl::Infernal
- Defined in:
- lib/wrnap/etl/infernal.rb
Constant Summary collapse
- NAME_REGEX =
/>>\s+(\S+)(.*\n){3}.*\s(\d+)\s+(\d+)\s+[\+-].*\n/
- HIT_SEQUENCE =
/^.*\d+\s+(.*)\s+\d+\s*$/
- HIT_STRUCTURE =
/([()<>{}\[\]\-_~,.:]+)\s+CS/
- LOCAL_END =
/\*\[\s*\d+\s*\]\*/
Class Method Summary collapse
- .convert_infernal_to_dot_bracket(structure) ⇒ Object
- .load_all(file) ⇒ Object
- .parse_hit(output) ⇒ Object
- .pull_infernal_hit_sequence(output) ⇒ Object
- .pull_infernal_hit_structure(output) ⇒ Object
Class Method Details
.convert_infernal_to_dot_bracket(structure) ⇒ Object
44 45 46 47 |
# File 'lib/wrnap/etl/infernal.rb', line 44 def convert_infernal_to_dot_bracket(structure) # http://jalview-rnasupport.blogspot.com/2010/06/parsing-wuss-notation-of-rna-secondary.html structure.gsub(/[_~,.:]/, ?.).gsub(/[(<{\[]/, ?().gsub(/[)>}\]]/, ?)) end |
.load_all(file) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/wrnap/etl/infernal.rb', line 10 def load_all(file) output = File.exist?(file) ? File.read(file) : file if output =~ /No hits detected that satisfy reporting thresholds/ [] else output. gsub(/^(.*\n)*Hit alignments:\n/, ""). gsub(/Internal CM pipeline statistics summary:\n(.*\n)*$/, ""). strip.split(?\n).reject(&:empty?).each_slice(10).map { |lines| parse_hit(lines.join(?\n)) }.compact end.wrnap end |
.parse_hit(output) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/wrnap/etl/infernal.rb', line 23 def parse_hit(output) name = if (name_match = output.match(NAME_REGEX)) # This is a pretty fancy regex, and there's no guarantee that the data has this info, so let's just test the waters here. _, id, _, seq_from, seq_to, _ = name_match.to_a "%s %d %d" % [id.split(?|).last, seq_from, seq_to] end unless (hit_sequence = pull_infernal_hit_sequence(output)) =~ LOCAL_END Stockholm.fit_structure_to_sequence(hit_sequence, pull_infernal_hit_structure(output)).tap { |rna| rna.comment = name if name } end end |
.pull_infernal_hit_sequence(output) ⇒ Object
35 36 37 38 |
# File 'lib/wrnap/etl/infernal.rb', line 35 def pull_infernal_hit_sequence(output) # Dots are gaps in Stockholm format, and this uses the Stockholm parser underneath. output.scan(HIT_SEQUENCE)[-1][-1].upcase.gsub(?-, ?.) end |
.pull_infernal_hit_structure(output) ⇒ Object
40 41 42 |
# File 'lib/wrnap/etl/infernal.rb', line 40 def pull_infernal_hit_structure(output) convert_infernal_to_dot_bracket(output.match(HIT_STRUCTURE)[1]) end |