Class: Kfold::DataFile

Inherits:
Object
  • Object
show all
Defined in:
lib/kfold/data_file.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, delimiter = "\n", granularity = 1) ⇒ DataFile

Returns a new instance of DataFile.



6
7
8
# File 'lib/kfold/data_file.rb', line 6

def initialize(filename, delimiter = "\n", granularity = 1)
  @filename, @delimiter, @granularity = filename, delimiter, granularity
end

Instance Attribute Details

#delimiterObject (readonly)

Returns the value of attribute delimiter.



4
5
6
# File 'lib/kfold/data_file.rb', line 4

def delimiter
  @delimiter
end

#filenameObject (readonly)

Returns the value of attribute filename.



4
5
6
# File 'lib/kfold/data_file.rb', line 4

def filename
  @filename
end

#granularityObject (readonly)

Returns the value of attribute granularity.



4
5
6
# File 'lib/kfold/data_file.rb', line 4

def granularity
  @granularity
end

Instance Method Details

#breakdown(parts = 10) ⇒ Object



18
19
20
21
22
23
24
25
# File 'lib/kfold/data_file.rb', line 18

def breakdown(parts = 10)
  blocks_per_part, rest = self.num_blocks.divmod(parts)
  msg = "#{num_entries} entries into #{parts} parts, #{blocks_per_part} blocks of #{self.granularity} entries per part"
  if rest > 0
    msg += " (plus #{rest} extra blocks in last part)"
  end
  msg
end

#each_entry_in_parts(parts = 10) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/kfold/data_file.rb', line 27

def each_entry_in_parts(parts = 10) 
  blocks_per_part, rest = num_blocks.divmod(parts)
  cur_part = 1
  cur_block = 1
  cur_entry = 0
  block_entries = 0
  part_entries = 0
  File.foreach(self.filename, self.delimiter) do |entry| 
    cur_entry += 1
    yield cur_part, entry
    block_entries += 1
    part_entries += 1
    if block_entries == self.granularity 
      # End of this block
      if cur_block == blocks_per_part and not cur_part == parts
        # End of this part
        cur_part += 1
        cur_block = 1
      else
        cur_block += 1
      end
      block_entries = 0
    end
  end
end

#num_blocksObject



14
15
16
# File 'lib/kfold/data_file.rb', line 14

def num_blocks
  @num_blocks ||= (self.num_entries.to_f/self.granularity.to_f).ceil
end

#num_entriesObject



10
11
12
# File 'lib/kfold/data_file.rb', line 10

def num_entries
  @num_entres ||= count_entries
end