Class: Rust::DataFrame

Inherits:
RustDatatype show all
Defined in:
lib/rust-core.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(labels_or_data) ⇒ DataFrame

Returns a new instance of DataFrame.



115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/rust-core.rb', line 115

def initialize(labels_or_data)
    @data = {}
    
    if labels_or_data.is_a? Array
        @labels = labels_or_data.map { |l| l.to_s }
        @labels.each { |label| @data[label] = [] }
    elsif labels_or_data.is_a? Hash
        @labels = labels_or_data.keys.map { |l| l.to_s }
        @labels.each { |label| @data[label] = [] }
        for i in 0...labels_or_data.values[0].size
            self.add_row(labels_or_data.map { |k, v| [k, v[i]] }.to_h)
        end
    end
end

Class Method Details

.pull_variable(variable) ⇒ Object



106
107
108
109
110
111
112
113
# File 'lib/rust-core.rb', line 106

def self.pull_variable(variable)
    hash = {}
    colnames = Rust._pull("colnames(#{variable})")
    colnames.each do |col|
        hash[col] = Rust._pull("#{variable}$#{col}")
    end
    return DataFrame.new(hash)
end

Instance Method Details

#add_row(row) ⇒ Object Also known as: <<



229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/rust-core.rb', line 229

def add_row(row)
    if row.is_a?(Array)
        raise "Expected an array of size #{@data.size}" unless row.size == @data.size
        
        @labels.each_with_index do |label, i|
            @data[label] << row[i]
        end
        
        return true
    elsif row.is_a?(Hash)
        raise "Expected a hash with the following keys: #{@data.keys}" unless row.keys.map { |l| l.to_s }.sort == @data.keys.sort
        
        row.each do |key, value|
            @data[key.to_s] << value
        end
#              
        return true
    else
        raise TypeError, "Expected an Array or a Hash"
    end
end

#column(name) ⇒ Object



135
136
137
# File 'lib/rust-core.rb', line 135

def column(name)
    return @data[name]
end

#column_namesObject Also known as: colnames



164
165
166
# File 'lib/rust-core.rb', line 164

def column_names
    return @data.keys.map { |k| k.to_s }
end

#delete_column(column) ⇒ Object



159
160
161
162
# File 'lib/rust-core.rb', line 159

def delete_column(column)
    @labels.delete(column)
    @data.delete(column)
end

#eachObject



252
253
254
255
256
257
258
# File 'lib/rust-core.rb', line 252

def each
    self.each_with_index do |element, i|
        yield element
    end
    
    return self
end

#each_with_indexObject



260
261
262
263
264
265
266
267
268
269
270
271
# File 'lib/rust-core.rb', line 260

def each_with_index
    for i in 0...self.rows
        element = {}
        @labels.each do |label|
            element[label] = @data[label][i]
        end
        
        yield element, i
    end
    
    return self
end

#inspectObject



289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/rust-core.rb', line 289

def inspect
    separator = " | "
    col_widths = self.column_names.map { |colname| [colname, ([colname.length] + @data[colname].map { |e| e.inspect.length }).max] }.to_h
    col_widths[:rowscol] = self.rows.inspect.length + 3
    
    result = ""
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
    result << (" " * col_widths[:rowscol]) + self.column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
    self.each_with_index do |row, i|
        result << "[#{i}] " + row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator) + "\n"
    end
    
    result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
    
    return result
end

#load_in_r_as(variable_name) ⇒ Object



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/rust-core.rb', line 273

def load_in_r_as(variable_name)
    command = []
    
    command << "#{variable_name} <- data.frame()"
    row_index = 1
    self.each do |row|
        keys    = row.keys.map { |v| v.inspect }.join(",")
        values  = row.values.map { |v| v.inspect }.join(",")
        command << "#{variable_name}[#{row_index}, c(#{keys})] <- c(#{values})"
        
        row_index += 1
    end
    
    Rust._eval_big(command)
end

#merge(other, by, first_alias = "x", second_alias = "y") ⇒ Object

Raises:

  • (TypeError)


169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/rust-core.rb', line 169

def merge(other, by, first_alias = "x", second_alias = "y")
    raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
    raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
    raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
    raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
    raise "The aliases can not have the same value" if first_alias == second_alias
    
    my_keys = {}
    self.each_with_index do |row, i|
        key = []
        by.each do |colname|
            key << row[colname]
        end
        
        my_keys[key] = i
    end
    
    merged_column_self  = (self.column_names - by)
    merged_column_other = (other.column_names - by)
    
    first_alias =  first_alias + "."     if first_alias.length > 0
    second_alias = second_alias + "."    if second_alias.length > 0
    
    merged_columns = merged_column_self.map { |colname| "#{first_alias}#{colname}" } + merged_column_other.map { |colname| "#{second_alias}#{colname}" }
    columns = by + merged_columns
    result = DataFrame.new(columns)
    other.each do |other_row|
        key = []
        by.each do |colname|
            key << other_row[colname]
        end
        
        my_row_index = my_keys[key]
        if my_row_index
            my_row = self[my_row_index]
            
            to_add = {}
            by.each do |colname|
                to_add[colname] = my_row[colname]
            end
            
            merged_column_self.each do |colname|
                to_add["#{first_alias}#{colname}"] = my_row[colname]
            end
            
            merged_column_other.each do |colname|
                to_add["#{second_alias}#{colname}"] = other_row[colname]
            end
            
            result << to_add
        end
    end
    
    return result
end

#row(i) ⇒ Object Also known as: []



130
131
132
# File 'lib/rust-core.rb', line 130

def row(i)
    return @data.map { |label, values| [label, values[i]] }.to_h
end

#rowsObject



225
226
227
# File 'lib/rust-core.rb', line 225

def rows
    @data.values[0].size
end

#select_colsObject



151
152
153
154
155
156
157
# File 'lib/rust-core.rb', line 151

def select_cols
    result = self.clone
    @labels.each do |label|
        result.delete_column(label) unless yield label
    end
    return result
end

#select_rowsObject



143
144
145
146
147
148
149
# File 'lib/rust-core.rb', line 143

def select_rows
    result = DataFrame.new(self.column_names)
    self.each do |row|
        result << row if yield row
    end
    return result
end

#transform_column!(column) ⇒ Object



139
140
141
# File 'lib/rust-core.rb', line 139

def transform_column!(column)
    @data[column].map! { |e| yield e }
end