Class: Dphil::Csv2NexConverter

Inherits:
Object
  • Object
show all
Includes:
Converter
Defined in:
lib/dphil/converters/csv2nex.rb

Overview

CSV to NEXUS file converter class

Instance Method Summary collapse

Constructor Details

#initialize(csv_file, opts = {}) ⇒ Csv2NexConverter

Returns a new instance of Csv2NexConverter.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/dphil/converters/csv2nex.rb', line 10

def initialize(csv_file, opts = {})
  opts = opts.to_h

  # Load csv file
  @csv = load_csv(csv_file, "r:bom|utf-8")
  @csv = @csv.transpose if opts[:transpose]

  # Load paup file
  if opts[:paup_data].nil?
    opts[:paup_data] = File.join(GEM_ROOT, "vendor", "default_commands.paup")
  end
  @paup = load_file(opts[:paup_data])
  @paup << "\n" unless @paup.blank? || @paup[-1] == "\n"
  @paup.indent!(2)
  @paup.freeze
end

Instance Method Details

#convertObject

Perform the conversion and return a string result



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/dphil/converters/csv2nex.rb', line 28

def convert
  # Setup taxa information and orientation
  taxa_count = @csv.first.count
  character_count = @csv.count - 1
  taxa_labels = @csv.first.map { |name| name.to_s.strip.scrub.gsub(/[^A-Za-z0-9]/, "_") }

  # Generate labels and matrix
  character_labels = []
  character_matrix = taxa_labels.map { |t| [t] }
  (1..character_count).each do |r|
    row = @csv[r]
    token_hash = tokenize(row)
    character_label = (token_hash.map do |k, _|
      "'#{sanitize_char(k)}'"
    end).join(" ")
    character_labels << %(#{r} /#{character_label})
    row.each_with_index do |charstate, i|
      token = token_hash[sanitize_char(charstate)]
      character_matrix[i] << (token.nil? ? "-" : token[0])
    end
  end
  character_matrix.map! do |arr|
    "#{arr.shift} #{arr.join('')}"
  end

  # Return NEXUS output
  "    #NEXUS\n\n    BEGIN TAXA;\n      TITLE Manuscripts;\n      DIMENSIONS NTAX=\#{taxa_count};\n      TAXLABELS \#{taxa_labels.join(' ')};\n    END;\n\n    BEGIN CHARACTERS;\n      TITLE  Variant_Matrix;\n      DIMENSIONS  NCHAR=\#{character_count};\n      FORMAT DATATYPE = STANDARD RESPECTCASE GAP = - MISSING = ? SYMBOLS = \"\#{ALPHABET.join(' ')}\";\n      CHARSTATELABELS \#{character_labels.join(', ')};\n      MATRIX\n        \#{character_matrix.join(\"\\n    \")}\n    ;\n\n    END;\n\n    BEGIN ASSUMPTIONS;\n      OPTIONS DEFTYPE = UNORD;\n    END;\n\n    BEGIN PAUP;\n    \#{@paup}END;\n  NEXUS_EOF\nend\n"