Class: BlastStringParser

Inherits:
Object
  • Object
show all
Defined in:
lib/blast_string_parser.rb

Constant Summary collapse

SPECIES_REGEXP2 =

Set up Regexps SPECIES_REGEXP2 = /^.*[(w* w*).].$/ #captures the first two words in square brackets

/^.*\[(.*)\].*$/
SGI_REGEXP =
/^gi\|(\d+)\|.*$/
QUERY_SEQ_REGEXP =

QUERY_SEQ_REGEXP = /^([a-zA-Z0-9]+).*$/ #This captures everything up to the 1st underscore

/^(\S+)\s.*$/
SUBJ_ANNOTATION_REGEXP =

do not expect whitespace after the last | for robustness, strip later

/(?:.*\|)*(.*)\[.*/

Instance Method Summary collapse

Constructor Details

#initializeBlastStringParser

Returns a new instance of BlastStringParser.



2
3
4
# File 'lib/blast_string_parser.rb', line 2

def initialize
  
end

Instance Method Details

#get_query_seq(a_query) ⇒ Object



41
42
43
44
45
46
47
# File 'lib/blast_string_parser.rb', line 41

def get_query_seq(a_query)
  unless QUERY_SEQ_REGEXP.match(a_query)
    return a_query
  else
    return QUERY_SEQ_REGEXP.match(a_query)[1]
  end
end

#get_sgi_info(a_hit_id) ⇒ Object



16
17
18
19
20
21
22
# File 'lib/blast_string_parser.rb', line 16

def get_sgi_info(a_hit_id)
  unless SGI_REGEXP.match(a_hit_id)
    raise("Wrong hit id " + a_hit_id)
  else
    return SGI_REGEXP.match(a_hit_id)[1]
  end
end

#get_species_name(a_hit_def) ⇒ Object



24
25
26
27
28
29
30
# File 'lib/blast_string_parser.rb', line 24

def get_species_name(a_hit_def)
  unless SPECIES_REGEXP2.match(a_hit_def)
    raise "No species info found!"
  else
    return SPECIES_REGEXP2.match(a_hit_def)[1]
  end
end

#get_subject_annotation(a_hit_def) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/blast_string_parser.rb', line 32

def get_subject_annotation(a_hit_def)
  unless SUBJ_ANNOTATION_REGEXP.match(a_hit_def)
    puts "Can not parse subject annotation " + a_hit_def[0..20] + "...\n"
    return a_hit_def
  else
    return SUBJ_ANNOTATION_REGEXP.match(a_hit_def)[1].strip
  end
end