Module: TextDataTools

Defined in:
lib/text-data-tools.rb

Defined Under Namespace

Classes: NotFoundError, TextDataFile

Class Method Summary collapse

Class Method Details

.column_index_from_headers(line, column_header, header_match) ⇒ Object

Raises:

  • (ArgumentError)


147
148
149
150
151
152
153
154
155
# File 'lib/text-data-tools.rb', line 147

def self.column_index_from_headers(line, column_header, header_match)
  headers = line.scan(header_match)
  #p headers
  index_array = headers.map{|head| head =~ (column_header.kind_of?(Regexp) ? column_header : Regexp.new(Regexp.escape(column_header)))}
  #p index_array
  raise ArgumentError.new("column_header: #{column_header.inspect} does not match any columns in #{headers.inspect}") if index_array.compact.size == 0
  raise ArgumentError.new("column_header: #{column_header.inspect} matches more than 1 column in #{headers.inspect}") if index_array.compact.size > 1
  column_header = index_array.index(index_array.compact[0])
end

.get_1d_array(filename, has_header_line, column_header, match = /\S+/, header_match = /\S+/) ⇒ Object

Return a one-dimensional array containing data from the file filename,

which may or may not have a line of column headers,
in the column column_header, where column_header maybe either a string
or a regex which matches the title of the column,  or an integer
giving the zero-based  column number.

Match is a regexp that matches data items, and header_match is a regexp that
matches items in the headers.

All data is returned as strings

Raises:

  • (ArgumentError)


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/text-data-tools.rb', line 14

def self.get_1d_array(filename, has_header_line, column_header, match=/\S+/, header_match=/\S+/)
  raise ArgumentError.new("column_header header should be a string, regex or integer") unless [String, Regexp, Integer].find{|cls| column_header.kind_of? cls}
  array = []
  File.open(filename) do |file|
    headers = file.gets if has_header_line
    if [String, Regexp].find{|cls| column_header.kind_of? cls}
      raise ("Header search given but has_header_line = false") if not has_header_line
      column_header = column_index_from_headers(headers, column_header, header_match)
    end
    while line = file.gets
      values = line.scan(match)
      array.push values[column_header]  
      #puts line
    end
  end
  array
end

.get_1d_array_float(*args) ⇒ Object

Calls get_1d_array and converts all data elements to floats



34
35
36
# File 'lib/text-data-tools.rb', line 34

def self.get_1d_array_float(*args)
  get_1d_array(*args).map{|v| v.to_f}
end

.get_1d_array_integer(*args) ⇒ Object



37
38
39
# File 'lib/text-data-tools.rb', line 37

def self.get_1d_array_integer(*args)
  get_1d_array(*args).map{|v| v.to_i}
end

.get_2d_array(filename, has_header_line, column_header, index_header = nil, match = /\S+/, header_match = /\S+/) ⇒ Object

Return a two-dimensional array containing data from the file filename,

which may or may not have a line of column headers,
in the column column_header, where column_header maybe either a string
or a regex which matches the title of the column,  or an integer
giving the zero-based  column number.

It is assumed that two-dimensional array is in one column. 
If index_header is nil, data is assumed to be separated by blank lines.
E.g.
    1.2
    4.2
    7.2

    8.2
    4.2
    2.2
If index_header is an integer or string or regexp, it selects a column
in the manner of column_header, and the data is divided by values of this
column.
E.g. 
    1  5.5
    1  3.2
    1  2.6
    2  3.2

2 2.2 2 6.3

Match is a regexp that matches data items, and header_match is a regexp that
matches items in the headers.

All data is returned as strings

Raises:

  • (ArgumentError)


72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/text-data-tools.rb', line 72

def self.get_2d_array(filename, has_header_line, column_header, index_header=nil, match=/\S+/, header_match=/\S+/)
  raise ArgumentError.new("column_header header should be a string, regex or integer") unless [String, Regexp, Integer].find{|cls| column_header.kind_of? cls}
  raise ArgumentError.new("index_header should be a string, regex, integer or nil") unless [String, Regexp, Integer, NilClass].find{|cls| column_header.kind_of? cls}
  array = []
  File.open(filename) do |file|
    headers = file.gets if has_header_line
    if [String, Regexp].find{|cls| column_header.kind_of? cls}
      raise ("Header search given but has_header_line = false") if not has_header_line
      column_header = column_index_from_headers(headers, column_header, header_match)
    end
    if [String, Regexp].find{|cls| index_header.kind_of? cls}
      raise ("Header search given but has_header_line = false") if not has_header_line
      index_header = column_index_from_headers(headers, index_header, header_match)
    end
    index_value = false
    index = 0
    while line = file.gets
      if index_header.nil?
        if line =~ /^\s*$/
          if array.size == 0 # ignore empty lines at top
            next
          else
            (array.push []; index+=1;next) 
          end
        end
        array.push [] if array.size = 0
      else
        next if line =~ /^\s*$/
      end
      values = line.scan(match)
      if not index_header.nil?
        if array.size ==0
          array.push []
          index_value = values[index_header]
        elsif index_value != values[index_header]
          array.push []
          index+=1
          index_value = values[index_header]
        end
      end
      array[index].push values[column_header] 
      #puts line
    end
  end
  array
end

.get_2d_array_float(*args) ⇒ Object

Calls get_2d_array and converts all data elements to floats



120
121
122
# File 'lib/text-data-tools.rb', line 120

def self.get_2d_array_float(*args)
  get_2d_array(*args).map{|a| a.map{|v| v.to_f}}
end

.get_2d_array_integer(*args) ⇒ Object



123
124
125
# File 'lib/text-data-tools.rb', line 123

def self.get_2d_array_integer(*args)
  get_2d_array(*args).map{|a| a.map{|v| v.to_i}}
end

.get_variable_value(filename, name, sep = '=') ⇒ Object

Extract a variable value from the given file where the variable is defined in this form:

name sep value

E.g. heat = 4.0

Raises:



135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/text-data-tools.rb', line 135

def self.get_variable_value(filename, name, sep='=')
  value = nil
  File.open(filename) do |file|
    while line= file.gets
      next unless line =~ Regexp.new("#{Regexp.escape(name)}\\s*#{Regexp.escape(sep)}\\s*(?<value>.*)")
      value = $~[:value]
        
    end
  end
  raise NotFoundError.new("Can't find #{name} in #{filename}") unless value
  value
end