5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
# File 'lib/bio-stockholm/stockholm.rb', line 5
def self.parse_from_file(filename)
state = :first
returns = []
to_return = Bio::Stockholm::Store.new
File.open(filename).each_line do |line|
next if line.strip.empty? and state == :first_block
if state == :first
unless line == "\# STOCKHOLM 1.0\n"
raise FormatException, "Currently unable to parse stockholm format files unless they are version 1.0"
end
to_return. = line.strip
state = :first_block
elsif state == :first_block
if matches = line.match(/^\#=(..) (\S+)\s+(.*)/)
if matches[1] == 'GF'
to_return.gf_features ||= {}
if to_return.gf_features.key?(matches[2])
to_return.gf_features[matches[2]] = to_return.gf_features[matches[2]]+' '+matches[3]
else
to_return.gf_features[matches[2]] = matches[3]
end
elsif matches[1] == 'GC'
to_return.gc_features ||= {}
if to_return.gc_features.key?(matches[2])
to_return.gc_features[matches[2]] = to_return.gc_features[matches[2]]+matches[3]
else
to_return.gc_features[matches[2]] = matches[3]
end
else
unless matches2 = matches[3].match(/(.*?)\s+(.*)/)
raise FormatException, "Unable to parse stockholm GS or GR format line: #{line}"
end
sequence_identifier = matches[2]
to_return.records[sequence_identifier] ||= Record.new
if matches[1] == 'GS'
to_return.records[sequence_identifier].gs_features ||= {}
if to_return.records[sequence_identifier].gs_features[matches2[1]]
to_return.records[sequence_identifier].gs_features[matches2[1]] += matches2[2]
else
to_return.records[sequence_identifier].gs_features[matches2[1]] = matches2[2]
end
elsif matches[1] == 'GR'
to_return.records[sequence_identifier].gr_features ||= {}
if to_return.records[sequence_identifier].gr_features[matches2[1]]
to_return.records[sequence_identifier].gr_features[matches2[1]] += matches2[2]
else
to_return.records[sequence_identifier].gr_features[matches2[1]] = matches2[2]
end
else
raise FormatException, "Unable to parse stockholm format line: #{line}"
end
end
elsif line.match(/^\/\//)
returns.push to_return
to_return = Bio::Stockholm::Store.new
else
unless matches = line.match(/^(\S+)\s+(.+)$/)
raise FormatException, "Unable to parse stockholm format line: #{line}"
end
to_return.records[matches[1]] ||= Record.new
to_return.records[matches[1]].sequence ||= ''
to_return.records[matches[1]].sequence += matches[2].rstrip
end
end
end
return returns
end
|