Class: Reference_parser

Inherits:
Object
  • Object
show all
Defined in:
lib/pets/parsers/reference_parser.rb

Class Method Summary collapse

Class Method Details

.load(file_path, file_format: nil, feature_type: nil) ⇒ Object



4
5
6
7
8
9
10
11
# File 'lib/pets/parsers/reference_parser.rb', line 4

def self.load(file_path, file_format: nil, feature_type: nil)
  file_format = file_path.split('.', 2).last if file_format.nil?
  if file_format == 'gtf'
    regions, all_attrs = parse_gtf(file_path, feature_type: feature_type)
  end

  return Genomic_Feature.new(regions, annotations: all_attrs)
end

.parse_gtf(file_path, feature_type: nil) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/pets/parsers/reference_parser.rb', line 13

def self.parse_gtf(file_path, feature_type: nil) # https://www.ensembl.org/info/website/upload/gff.html
  features = []
  all_attrs = {}
  File.open(file_path).each do |line|
    next if /^#/ =~ line
    seqname, source, feature, start, stop, score, strand, frame, attribute = line.chomp.split("\t")
    if feature_type.nil? || feature_type == feature
      attrs = process_attrs(attribute, ';', ' ')
      attrs['source'] = source
      attrs['feature'] = feature
      id = attrs['gene_id']
      features << [seqname.gsub('chr',''), start.to_i, stop.to_i, id]
      all_attrs[id] = attrs
    end
  end
  return features, all_attrs
end