Module: SVMKit::Dataset

Defined in:
lib/svmkit/dataset.rb

Overview

Module for loading and saving a dataset file.

Class Method Summary collapse

Class Method Details

.dump_libsvm_file(data, labels, filename, zero_based: false) ⇒ Object

Dump the dataset with the libsvm file format.



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/svmkit/dataset.rb', line 34

def dump_libsvm_file(data, labels, filename, zero_based: false)
  n_samples = [data.shape[0], labels.shape[0]].min
  label_type = detect_dtype(labels)
  value_type = detect_dtype(data)
  File.open(filename, 'w') do |file|
    n_samples.times do |n|
      file.puts(dump_libsvm_line(labels[n], data[n, true],
                                 label_type, value_type, zero_based))
    end
  end
end

.load_libsvm_file(filename, zero_based: false) ⇒ Array<Numo::NArray>

Load a dataset with the libsvm file format into Numo::NArray.



15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/svmkit/dataset.rb', line 15

def load_libsvm_file(filename, zero_based: false)
  ftvecs = []
  labels = []
  n_features = 0
  File.read(filename).split("\n").each do |line|
    label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
    labels.push(label)
    ftvecs.push(ftvec)
    n_features = [n_features, max_idx].max
  end
  [convert_to_matrix(ftvecs, n_features), Numo::NArray.asarray(labels)]
end