Module: TextDataTools
- Defined in:
- lib/text-data-tools.rb
Defined Under Namespace
Classes: NotFoundError, TextDataFile
Class Method Summary collapse
- .column_index_from_headers(line, column_header, header_match) ⇒ Object
-
.get_1d_array(filename, has_header_line, column_header, match = /\S+/, header_match = /\S+/) ⇒ Object
Return a one-dimensional array containing data from the file filename, which may or may not have a line of column headers, in the column column_header, where column_header maybe either a string or a regex which matches the title of the column, or an integer giving the zero-based column number.
-
.get_1d_array_float(*args) ⇒ Object
Calls get_1d_array and converts all data elements to floats.
- .get_1d_array_integer(*args) ⇒ Object
-
.get_2d_array(filename, has_header_line, column_header, index_header = nil, match = /\S+/, header_match = /\S+/) ⇒ Object
Return a two-dimensional array containing data from the file filename, which may or may not have a line of column headers, in the column column_header, where column_header maybe either a string or a regex which matches the title of the column, or an integer giving the zero-based column number.
-
.get_2d_array_float(*args) ⇒ Object
Calls get_2d_array and converts all data elements to floats.
- .get_2d_array_integer(*args) ⇒ Object
-
.get_variable_value(filename, name, sep = '=') ⇒ Object
Extract a variable value from the given file where the variable is defined in this form: name sep value E.g.
Class Method Details
.column_index_from_headers(line, column_header, header_match) ⇒ Object
147 148 149 150 151 152 153 154 155 |
# File 'lib/text-data-tools.rb', line 147 def self.column_index_from_headers(line, column_header, header_match) headers = line.scan(header_match) #p headers index_array = headers.map{|head| head =~ (column_header.kind_of?(Regexp) ? column_header : Regexp.new(Regexp.escape(column_header)))} #p index_array raise ArgumentError.new("column_header: #{column_header.inspect} does not match any columns in #{headers.inspect}") if index_array.compact.size == 0 raise ArgumentError.new("column_header: #{column_header.inspect} matches more than 1 column in #{headers.inspect}") if index_array.compact.size > 1 column_header = index_array.index(index_array.compact[0]) end |
.get_1d_array(filename, has_header_line, column_header, match = /\S+/, header_match = /\S+/) ⇒ Object
Return a one-dimensional array containing data from the file filename,
which may or may not have a line of column headers,
in the column column_header, where column_header maybe either a string
or a regex which matches the title of the column, or an integer
giving the zero-based column number.
Match is a regexp that matches data items, and header_match is a regexp that
matches items in the headers.
All data is returned as strings
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/text-data-tools.rb', line 14 def self.get_1d_array(filename, has_header_line, column_header, match=/\S+/, header_match=/\S+/) raise ArgumentError.new("column_header header should be a string, regex or integer") unless [String, Regexp, Integer].find{|cls| column_header.kind_of? cls} array = [] File.open(filename) do |file| headers = file.gets if has_header_line if [String, Regexp].find{|cls| column_header.kind_of? cls} raise ("Header search given but has_header_line = false") if not has_header_line column_header = column_index_from_headers(headers, column_header, header_match) end while line = file.gets values = line.scan(match) array.push values[column_header] #puts line end end array end |
.get_1d_array_float(*args) ⇒ Object
Calls get_1d_array and converts all data elements to floats
34 35 36 |
# File 'lib/text-data-tools.rb', line 34 def self.get_1d_array_float(*args) get_1d_array(*args).map{|v| v.to_f} end |
.get_1d_array_integer(*args) ⇒ Object
37 38 39 |
# File 'lib/text-data-tools.rb', line 37 def self.get_1d_array_integer(*args) get_1d_array(*args).map{|v| v.to_i} end |
.get_2d_array(filename, has_header_line, column_header, index_header = nil, match = /\S+/, header_match = /\S+/) ⇒ Object
Return a two-dimensional array containing data from the file filename,
which may or may not have a line of column headers,
in the column column_header, where column_header maybe either a string
or a regex which matches the title of the column, or an integer
giving the zero-based column number.
It is assumed that two-dimensional array is in one column.
If index_header is nil, data is assumed to be separated by blank lines.
E.g.
1.2
4.2
7.2
8.2
4.2
2.2
If index_header is an integer or string or regexp, it selects a column
in the manner of column_header, and the data is divided by values of this
column.
E.g.
1 5.5
1 3.2
1 2.6
2 3.2
2 2.2 2 6.3
Match is a regexp that matches data items, and header_match is a regexp that
matches items in the headers.
All data is returned as strings
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/text-data-tools.rb', line 72 def self.get_2d_array(filename, has_header_line, column_header, index_header=nil, match=/\S+/, header_match=/\S+/) raise ArgumentError.new("column_header header should be a string, regex or integer") unless [String, Regexp, Integer].find{|cls| column_header.kind_of? cls} raise ArgumentError.new("index_header should be a string, regex, integer or nil") unless [String, Regexp, Integer, NilClass].find{|cls| column_header.kind_of? cls} array = [] File.open(filename) do |file| headers = file.gets if has_header_line if [String, Regexp].find{|cls| column_header.kind_of? cls} raise ("Header search given but has_header_line = false") if not has_header_line column_header = column_index_from_headers(headers, column_header, header_match) end if [String, Regexp].find{|cls| index_header.kind_of? cls} raise ("Header search given but has_header_line = false") if not has_header_line index_header = column_index_from_headers(headers, index_header, header_match) end index_value = false index = 0 while line = file.gets if index_header.nil? if line =~ /^\s*$/ if array.size == 0 # ignore empty lines at top next else (array.push []; index+=1;next) end end array.push [] if array.size = 0 else next if line =~ /^\s*$/ end values = line.scan(match) if not index_header.nil? if array.size ==0 array.push [] index_value = values[index_header] elsif index_value != values[index_header] array.push [] index+=1 index_value = values[index_header] end end array[index].push values[column_header] #puts line end end array end |
.get_2d_array_float(*args) ⇒ Object
Calls get_2d_array and converts all data elements to floats
120 121 122 |
# File 'lib/text-data-tools.rb', line 120 def self.get_2d_array_float(*args) get_2d_array(*args).map{|a| a.map{|v| v.to_f}} end |
.get_2d_array_integer(*args) ⇒ Object
123 124 125 |
# File 'lib/text-data-tools.rb', line 123 def self.get_2d_array_integer(*args) get_2d_array(*args).map{|a| a.map{|v| v.to_i}} end |
.get_variable_value(filename, name, sep = '=') ⇒ Object
Extract a variable value from the given file where the variable is defined in this form:
name sep value
E.g. heat = 4.0
135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/text-data-tools.rb', line 135 def self.get_variable_value(filename, name, sep='=') value = nil File.open(filename) do |file| while line= file.gets next unless line =~ Regexp.new("#{Regexp.escape(name)}\\s*#{Regexp.escape(sep)}\\s*(?<value>.*)") value = $~[:value] end end raise NotFoundError.new("Can't find #{name} in #{filename}") unless value value end |