Module: TorchAudio
- Defined in:
- lib/torchaudio.rb,
lib/torchaudio/version.rb,
lib/torchaudio/functional.rb,
lib/torchaudio/datasets/utils.rb,
lib/torchaudio/datasets/yesno.rb,
lib/torchaudio/transforms/mel_scale.rb,
lib/torchaudio/transforms/spectrogram.rb,
lib/torchaudio/transforms/amplitude_to_db.rb,
lib/torchaudio/transforms/mel_spectrogram.rb,
lib/torchaudio/transforms/mu_law_decoding.rb,
lib/torchaudio/transforms/mu_law_encoding.rb
Defined Under Namespace
Modules: Datasets, Functional, Transforms Classes: Error
Constant Summary collapse
- VERSION =
"0.2.0"
- F =
Functional
Class Method Summary collapse
- .load(filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
- .load_wav(filepath, **kwargs) ⇒ Object
- .save(filepath, src, sample_rate, precision: 16, channels_first: true) ⇒ Object
- .save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
Class Method Details
.load(filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/torchaudio.rb', line 30 def load( filepath, out: nil, normalization: true, channels_first: true, num_frames: 0, offset: 0, signalinfo: nil, encodinginfo: nil, filetype: nil ) filepath = filepath.to_s # check if valid file unless File.exist?(filepath) raise ArgumentError, "#{filepath} not found or is a directory" end # initialize output tensor if !out.nil? check_input(out) else out = Torch::FloatTensor.new end if num_frames < -1 raise ArgumentError, "Expected value for num_samples -1 (entire file) or >=0" end if offset < 0 raise ArgumentError, "Expected positive offset value" end # same logic as C++ # could also make read_audio_file work with nil filetype ||= File.extname(filepath)[1..-1] sample_rate = Ext.read_audio_file( filepath, out, channels_first, num_frames, offset, signalinfo, encodinginfo, filetype ) # normalize if needed normalize_audio(out, normalization) [out, sample_rate] end |
.load_wav(filepath, **kwargs) ⇒ Object
78 79 80 81 |
# File 'lib/torchaudio.rb', line 78 def load_wav(filepath, **kwargs) kwargs[:normalization] = 1 << 16 load(filepath, **kwargs) end |
.save(filepath, src, sample_rate, precision: 16, channels_first: true) ⇒ Object
83 84 85 86 87 88 89 90 91 |
# File 'lib/torchaudio.rb', line 83 def save(filepath, src, sample_rate, precision: 16, channels_first: true) si = Ext::SignalInfo.new ch_idx = channels_first ? 0 : 1 si.rate = sample_rate si.channels = src.dim == 1 ? 1 : src.size(ch_idx) si.length = src.numel si.precision = precision save_encinfo(filepath, src, channels_first: channels_first, signalinfo: si) end |
.save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ⇒ Object
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/torchaudio.rb', line 93 def save_encinfo(filepath, src, channels_first: true, signalinfo: nil, encodinginfo: nil, filetype: nil) ch_idx, len_idx = channels_first ? [0, 1] : [1, 0] # check if save directory exists abs_dirpath = File.dirname(File.(filepath)) unless Dir.exist?(abs_dirpath) raise "Directory does not exist: #{abs_dirpath}" end # check that src is a CPU tensor check_input(src) # Check/Fix shape of source data if src.dim == 1 # 1d tensors as assumed to be mono signals src.unsqueeze!(ch_idx) elsif src.dim > 2 || src.size(ch_idx) > 16 # assumes num_channels < 16 raise ArgumentError, "Expected format where C < 16, but found #{src.size}" end # sox stores the sample rate as a float, though practically sample rates are almost always integers # convert integers to floats if signalinfo if signalinfo.rate && !signalinfo.rate.is_a?(Float) if signalinfo.rate.to_f == signalinfo.rate signalinfo.rate = signalinfo.rate.to_f else raise ArgumentError, "Sample rate should be a float or int" end end # check if the bit precision (i.e. bits per sample) is an integer if signalinfo.precision && ! signalinfo.precision.is_a?(Integer) if signalinfo.precision.to_i == signalinfo.precision signalinfo.precision = signalinfo.precision.to_i else raise ArgumentError, "Bit precision should be an integer" end end end # programs such as librosa normalize the signal, unnormalize if detected if src.min >= -1.0 && src.max <= 1.0 src = src * (1 << 31) src = src.long end # set filetype and allow for files with no extensions extension = File.extname(filepath) filetype = extension.length > 0 ? extension[1..-1] : filetype # transpose from C x L -> L x C if channels_first src = src.transpose(1, 0) end # save data to file src = src.contiguous Ext.write_audio_file(filepath, src, signalinfo, encodinginfo, filetype) end |