Class: SequenceServer::Sequence

Inherits:
Struct
  • Object
show all
Defined in:
lib/sequenceserver/sequence.rb,
lib/sequenceserver/sequence.rb

Overview

Utility methods.

Defined Under Namespace

Classes: Retriever

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*args) ⇒ Sequence

Returns a new instance of Sequence.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/sequenceserver/sequence.rb', line 35

def initialize(*args)
  # If gi of the hit is 'N/A', make it nil instead.
  args[0] = nil if args[0] == 'N/A'

  # If seqid has 'lcl|' prefixed, remove it.
  args[1] = args[1].gsub(/^lcl\|/, '')

  # If hit comes from a non -parse_seqids
  # database, obtain seqid and title from
  # defline.
  if args[1] =~ /^gnl\|BL_ORD_ID\|\d+/
    defline = args[3].split
    args[1] = defline.shift
    args[3] = defline.join(' ')
  end

  super
end

Instance Attribute Details

#accessionObject

Returns the value of attribute accession

Returns:

  • (Object)

    the current value of accession



34
35
36
# File 'lib/sequenceserver/sequence.rb', line 34

def accession
  @accession
end

#giObject

Returns the value of attribute gi

Returns:

  • (Object)

    the current value of gi



34
35
36
# File 'lib/sequenceserver/sequence.rb', line 34

def gi
  @gi
end

#seqidObject

Returns the value of attribute seqid

Returns:

  • (Object)

    the current value of seqid



34
35
36
# File 'lib/sequenceserver/sequence.rb', line 34

def seqid
  @seqid
end

#titleObject

Returns the value of attribute title

Returns:

  • (Object)

    the current value of title



34
35
36
# File 'lib/sequenceserver/sequence.rb', line 34

def title
  @title
end

#valueObject

Returns the value of attribute value

Returns:

  • (Object)

    the current value of value



34
35
36
# File 'lib/sequenceserver/sequence.rb', line 34

def value
  @value
end

Class Method Details

.composition(sequence_string) ⇒ Object

Copied from BioRuby’s ‘Bio::Sequence` class.

> composition(“asdfasdfffffasdf”)

> “d”=>3, “f”=>7, “s”=>3



110
111
112
113
114
115
116
# File 'lib/sequenceserver/sequence.rb', line 110

def composition(sequence_string)
  count = Hash.new(0)
  sequence_string.scan(/./) do |x|
    count[x] += 1
  end
  count
end

.guess_type(sequence) ⇒ Object

Strips all non-letter characters. If less than 10 useable characters return ‘nil`. If at least 90% is ACGTU, returns `:nucleotide`, else `:protein`.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/sequenceserver/sequence.rb', line 89

def guess_type(sequence)
  # Clean the sequence: first remove non-letter characters, then
  # ambiguous characters.
  cleaned_sequence = sequence.gsub(/[^A-Z]/i, '').gsub(/[NX]/i, '')

  return if cleaned_sequence.length < 10 # conservative

  # Count putative NA in the sequence.
  na_count = 0
  composition = composition(cleaned_sequence)
  composition.each do |character, count|
    na_count += count if character =~ /[ACGTU]/i
  end

  na_count > (0.9 * cleaned_sequence.length) ? :nucleotide : :protein
end

Instance Method Details

#fastaObject

Returns FASTA formatted sequence.



74
75
76
77
78
79
80
# File 'lib/sequenceserver/sequence.rb', line 74

def fasta
  chars = 60
  lines = (length / chars.to_f).ceil
  defline  = ">#{id} #{title}"
  seqlines = (1..lines).map { |i| to_s[chars * (i - 1), chars] }
  [defline].concat(seqlines).join("\n")
end

#idObject

Returns FASTA sequence id.



55
56
57
# File 'lib/sequenceserver/sequence.rb', line 55

def id
  (gi ? ['gi', gi, seqid] : [seqid]).join('|')
end

#infoObject



69
70
71
# File 'lib/sequenceserver/sequence.rb', line 69

def info
  { value: value, id: id, title: title }
end

#lengthObject

Returns length of the sequence.



60
61
62
# File 'lib/sequenceserver/sequence.rb', line 60

def length
  value.length
end

#to_sObject

Returns sequence value.



65
66
67
# File 'lib/sequenceserver/sequence.rb', line 65

def to_s
  value
end