Class: Dataset
- Inherits:
-
Object
- Object
- Dataset
- Defined in:
- lib/kaba/dataset.rb
Instance Attribute Summary collapse
-
#lines ⇒ Object
readonly
Returns the value of attribute lines.
Instance Method Summary collapse
-
#_each(limit: nil) ⇒ Object
实现一个 each 方法,可以让用户通过 block 的方式遍历数据集,提供一个 add 方法,可以将数据添加到数据集中.
- #add(data) ⇒ Object
- #each(limit: nil, &block) ⇒ Object
-
#initialize(data_dir, prompt) ⇒ Dataset
constructor
A new instance of Dataset.
- #save(file_path) ⇒ Object
- #scan(limit: nil) ⇒ Object
- #validate ⇒ Object
Constructor Details
#initialize(data_dir, prompt) ⇒ Dataset
4 5 6 7 8 |
# File 'lib/kaba/dataset.rb', line 4 def initialize(data_dir, prompt) @data_files = Dir.glob(File.join(File.(data_dir), '*.target.json')) @lines = [] @prompt = prompt end |
Instance Attribute Details
#lines ⇒ Object (readonly)
Returns the value of attribute lines.
2 3 4 |
# File 'lib/kaba/dataset.rb', line 2 def lines @lines end |
Instance Method Details
#_each(limit: nil) ⇒ Object
实现一个 each 方法,可以让用户通过 block 的方式遍历数据集,提供一个 add 方法,可以将数据添加到数据集中
11 12 13 14 15 |
# File 'lib/kaba/dataset.rb', line 11 def _each(limit: nil) @data_files.first(limit || @data_files.size).each do |file| yield(Row.new(file), self) end end |
#add(data) ⇒ Object
22 23 24 |
# File 'lib/kaba/dataset.rb', line 22 def add(data) @lines << data end |
#each(limit: nil, &block) ⇒ Object
17 18 19 20 |
# File 'lib/kaba/dataset.rb', line 17 def each(limit: nil, &block) puts "Waring: each is very dangerous".colorize(:red) _each(limit: limit, &block) end |
#save(file_path) ⇒ Object
26 27 28 29 30 31 32 |
# File 'lib/kaba/dataset.rb', line 26 def save(file_path) File.open(File.(file_path), 'w') do |file| @lines.each do |line| file.puts(line.to_json) end end end |
#scan(limit: nil) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/kaba/dataset.rb', line 38 def scan(limit: nil) = TTY::ProgressBar.new( "Dataset: [:bar] :percent :current/:total", total: @data_files.first(limit || @data_files.size).size) Async do _each(limit: limit) do |row, ds| Async do instruction = @prompt.render(File.read row.input_file) target = " ```json\n \#{JSON.pretty_generate(JSON.parse(File.read(row.target_path)))}\n ```\n Markdown\n ds.add({ instruction: instruction, output: target })\n\n instruction = @prompt.render(File.read(row.input_file), export: true)\n ds.add({ instruction: instruction, output: target })\n\n progressbar.advance\n end\n end\n end.wait\nend\n" |
#validate ⇒ Object
34 35 36 |
# File 'lib/kaba/dataset.rb', line 34 def validate @lines.size == (@data_files.size * 2) end |