Class: Bio::DB::Vcf

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/vcf.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(line = nil, sample_names = nil) ⇒ Vcf

create the vcf object, use the ordered list of sample names to label samples if provided [‘A’, ‘B’, ‘C’], otherwise uses, 1,2,3 etc vcf = Bio::DB::Vcf(“19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3”)



15
16
17
18
19
# File 'lib/bio/db/vcf.rb', line 15

def initialize(line=nil, sample_names=nil)
  @info = {}
  @samples = {}
  parse_line(line, sample_names) if line != nil
end

Instance Attribute Details

#altObject

Returns the value of attribute alt.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def alt
  @alt
end

#chromObject

Returns the value of attribute chrom.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def chrom
  @chrom
end

#filterObject

Returns the value of attribute filter.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def filter
  @filter
end

#formatObject

Returns the value of attribute format.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def format
  @format
end

#idObject

Returns the value of attribute id.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def id
  @id
end

#infoObject

Returns the value of attribute info.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def info
  @info
end

#posObject

Returns the value of attribute pos.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def pos
  @pos
end

#qualObject

Returns the value of attribute qual.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def qual
  @qual
end

#refObject

Returns the value of attribute ref.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def ref
  @ref
end

#samplesObject

Returns the value of attribute samples.



11
12
13
# File 'lib/bio/db/vcf.rb', line 11

def samples
  @samples
end

Instance Method Details

#int_or_raw(x) ⇒ Object



21
22
23
# File 'lib/bio/db/vcf.rb', line 21

def int_or_raw(x)
  Integer.new(x) rescue x
end

#parse_line(line, sample_names = nil) ⇒ Object

gets the info in the Vcf lines and parses it, setting the attributes



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/bio/db/vcf.rb', line 26

def parse_line(line, sample_names=nil)
  return false if line[0,1] == '#'

  f = line.chomp.split("\t", -1)
  raise "VCF lines must have at least 8 fields" if f.size < 8
  @chrom = f[0]
  @pos = f[1].to_i
  @id =  '.' == f[2] ? nil : f[2]
  @ref = f[3]
  @alt = f[4]
  @qual = f[5].to_f
  @filter = '.' == f[6] ? nil : f[6]

  @info = '.' == f[7] ? nil : {}
  if @info
    info_vec = f[7].split(";")
    info_vec.each do |x|
      keyval = x.split("=", -1)
      if keyval.size == 2 # If it's key=value
        @info[keyval[0]] = keyval[1]
      else # Otherwise, it's just a flag
        @info[x] = nil
      end
    end
  end

  @samples = {}
  return true if f.size == 8  # Has just upto info
  raise "Can't have format with no samples" if f.size == 9
  
  @format = f[8]

  sample_keys = @format.split(":")

  num_samples = f.size - 9 # How many fields are past the format

  if sample_names == nil # Make the sample names just ["1", "2", ... , "num_samples}"
    sample_names = (1..num_samples).to_a.map{|i| i.to_s} 
  elsif sample_names.size != num_samples
    raise "Unexpected number of samples (#{num_samples}) based on the provided sample names (#{sample_names.inspect})"
  end

  sample_names.each_with_index do |sample_name, sample_index|
    i = sample_index + 9 # index into columns (f)
    sample_values = f[i].split(":")
    raise "Expected number of sample values to be <= number of sample keys in FORMAT column Format=#{@format} but sample=#{f[i]}" if sample_values.size > sample_keys.size
    @samples[sample_name] = {}
    sample_keys.each_with_index {|key, value_index| @samples[sample_name][key] = sample_values[value_index] || ""}
  end
  
  return true;
end