Class: Dataset

Inherits:
Object
  • Object
show all
Defined in:
lib/kaba/dataset.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_dir, prompt) ⇒ Dataset



4
5
6
7
8
# File 'lib/kaba/dataset.rb', line 4

def initialize(data_dir, prompt)
  @data_files = Dir.glob(File.join(File.expand_path(data_dir), '*.target.json'))
  @lines = []
  @prompt = prompt
end

Instance Attribute Details

#linesObject (readonly)

Returns the value of attribute lines.



2
3
4
# File 'lib/kaba/dataset.rb', line 2

def lines
  @lines
end

Instance Method Details

#_each(limit: nil) ⇒ Object

实现一个 each 方法,可以让用户通过 block 的方式遍历数据集,提供一个 add 方法,可以将数据添加到数据集中



11
12
13
14
15
# File 'lib/kaba/dataset.rb', line 11

def _each(limit: nil)
  @data_files.first(limit || @data_files.size).each do |file|
    yield(Row.new(file), self)
  end
end

#add(data) ⇒ Object



22
23
24
# File 'lib/kaba/dataset.rb', line 22

def add(data)
  @lines << data
end

#each(limit: nil, &block) ⇒ Object



17
18
19
20
# File 'lib/kaba/dataset.rb', line 17

def each(limit: nil, &block)
  puts "Waring: each is very dangerous".colorize(:red)
  _each(limit: limit, &block)
end

#save(file_path) ⇒ Object



26
27
28
29
30
31
32
# File 'lib/kaba/dataset.rb', line 26

def save(file_path)
  File.open(File.expand_path(file_path), 'w') do |file|
    @lines.each do |line|
      file.puts(line.to_json)
    end
  end
end

#scan(limit: nil) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/kaba/dataset.rb', line 38

def scan(limit: nil)
  progressbar = TTY::ProgressBar.new(
    "Dataset: [:bar] :percent :current/:total", 
    total: @data_files.first(limit || @data_files.size).size)
  Async do
    _each(limit: limit) do |row, ds|
      Async do
        instruction = @prompt.render(File.read row.input_file)
        target = "        ```json\n        \#{JSON.pretty_generate(JSON.parse(File.read(row.target_path)))}\n        ```\n        Markdown\n        ds.add({ instruction: instruction, output: target })\n\n        instruction = @prompt.render(File.read(row.input_file), export: true)\n        ds.add({ instruction: instruction, output: target })\n\n        progressbar.advance\n      end\n    end\n  end.wait\nend\n"

#validateObject



34
35
36
# File 'lib/kaba/dataset.rb', line 34

def validate
  @lines.size == (@data_files.size * 2)
end