Class: Rarff::Relation
- Inherits:
-
Object
- Object
- Rarff::Relation
- Defined in:
- lib/rarff.rb
Instance Attribute Summary collapse
-
#attributes ⇒ Object
Returns the value of attribute attributes.
-
#instances ⇒ Object
Returns the value of attribute instances.
-
#name ⇒ Object
Returns the value of attribute name.
Instance Method Summary collapse
- #create_attributes(attr_parse = false) ⇒ Object
- #expand_sparse(str) ⇒ Object
-
#initialize(name = '') ⇒ Relation
constructor
A new instance of Relation.
- #parse(str) ⇒ Object
- #to_arff(sparse = false) ⇒ Object
- #to_s ⇒ Object
Constructor Details
#initialize(name = '') ⇒ Relation
Returns a new instance of Relation.
126 127 128 129 130 |
# File 'lib/rarff.rb', line 126 def initialize(name='') @name = name @attributes = Array.new @instances = Array.new end |
Instance Attribute Details
#attributes ⇒ Object
Returns the value of attribute attributes.
123 124 125 |
# File 'lib/rarff.rb', line 123 def attributes @attributes end |
#instances ⇒ Object
Returns the value of attribute instances.
123 124 125 |
# File 'lib/rarff.rb', line 123 def instances @instances end |
#name ⇒ Object
Returns the value of attribute name.
123 124 125 |
# File 'lib/rarff.rb', line 123 def name @name end |
Instance Method Details
#create_attributes(attr_parse = false) ⇒ Object
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/rarff.rb', line 173 def create_attributes(attr_parse=false) raise Exception, "Not enough data to create ARFF attributes" if @instances.nil? or @instances.empty? or @instances[0].empty? # Keep track of whether an attribute has been defined or not. # The only reason an attribute would not be defined in the first # row is if it has nil's in it. The geek inside screams for a binary # encoding like chmod but eh. attributes_defined = {} @instances.each_with_index { |row, i| row.each_with_index { |col, j| next if attributes_defined[j] or col.nil? attributes_defined[j] = true #whatever happens, we are going to define it if attr_parse if col =~ /^\-?\d+\.?\d*$/ @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC) end next #parse next column - this one is finished end # No parsing - just take it how it is if col.kind_of?(Numeric) @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_NUMERIC) elsif col.kind_of?(String) @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_STRING) elsif col.kind_of?(TrueClass) or col.kind_of?(FalseClass) # How come there is no generic BooleanClass? @attributes[j] = Attribute.new("Attr#{j}", ATTRIBUTE_BOOLEAN) else raise Exception, "Could not parse attribute: #{col.inspect}" end } } # Make sure all attributes have a definition, because otherwise # needless errors are thrown @instances[0].each_index do |i| @attributes[i] ||= Attribute.new("Attr#{i}", ATTRIBUTE_NUMERIC) end end |
#expand_sparse(str) ⇒ Object
216 217 218 219 220 221 222 223 |
# File 'lib/rarff.rb', line 216 def (str) arr = Array.new(@attributes.size, 0) str.gsub(/^\s*\{(.*)\}\s*$/, "\\1").split(/\s*\,\s*/).map { |pr| pra = pr.split(/\s/) arr[pra[0].to_i] = pra[1] } arr end |
#parse(str) ⇒ Object
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# File 'lib/rarff.rb', line 133 def parse(str) in_data_section = false # TODO: Doesn't handle commas in quoted attributes. str.split("\n").each { |line| next if line =~ /^\s*$/ next if line =~ /^\s*#{COMMENT_MARKER}/ next if line.my_scan(/^\s*#{RELATION_MARKER}\s*(.*)\s*$/i) { |name| @name = name } next if line.my_scan(/^\s*#{ATTRIBUTE_MARKER}\s*([^\s]*)\s+(.*)\s*$/i) { |name, type| @attributes.push(Attribute.new(name, type)) } next if line.my_scan(/^\s*#{DATA_MARKER}/i) { in_data_section = true } next if in_data_section == false ## Below is data section handling # next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^\s*#{SPARSE_ARFF_BEGIN}(.*)#{SPARSE_ARFF_END}\s*$/) { |data| next if line.gsub(/^\s*(.*)\s*$/, "\\1").my_scan(/^#{ESC_SPARSE_ARFF_BEGIN}(.*)#{ESC_SPARSE_ARFF_END}$/) { |data| # Sparse ARFF # TODO: Factor duplication with non-sparse data below @instances << (data.first) create_attributes(true) } next if line.my_scan(/^\s*(.*)\s*$/) { |data| @instances << data.first.split(/,\s*/).map { |field| # Remove outer single quotes on strings, if any ('foo bar' --> foo bar) field.gsub(/^\s*\'(.*)\'\s*$/, "\\1") } create_attributes(true) } } end |
#to_arff(sparse = false) ⇒ Object
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
# File 'lib/rarff.rb', line 226 def to_arff(sparse=false) RELATION_MARKER + " #{@name}\n" + # @attributes.map{ |attr| attr.to_arff }.join("\n") + @attributes.join("\n") + "\n" + DATA_MARKER + "\n" + @instances.map { |inst| mapped = inst.map_with_index { |col, i| # First pass - quote strings with spaces, and dates # TODO: Doesn't handle cases in which strings already contain # quotes or are already quoted. unless col.nil? if @attributes[i].type =~ /^#{ATTRIBUTE_STRING}$/i if col =~ /\s+/ col = "'" + col + "'" end elsif @attributes[i].type =~ /^#{ATTRIBUTE_DATE}/i ## Hack comparison. Ugh. col = '"' + col + '"' end end # Do the final output if sparse if col.nil? or (@attributes[i].type =~ /^#{ATTRIBUTE_NUMERIC}$/i and col == 0) nil else "#{i} #{col}" end else if col.nil? MISSING else col end end } if sparse mapped.reject{|col| col.nil?}.join(', ') else mapped.join(", ") end }.join("\n").gsub(/^/, sparse ? '{' : '').gsub(/$/, sparse ? '}' : '') end |
#to_s ⇒ Object
274 275 276 |
# File 'lib/rarff.rb', line 274 def to_s to_arff end |