Class: Zillabyte::Command::Data
Overview
manage custom datasets
Constant Summary collapse
- MAX_POLL_SECONDS =
60 * 5
- POLL_SLEEP =
1
- APPENDS_ROWS_SLICE =
5_000
- ONE_HUNDRED_MB =
1e8
Constants inherited from Base
Instance Attribute Summary
Attributes inherited from Base
Instance Method Summary collapse
-
#append ⇒ Object
data:append ID FILE.
-
#authorize ⇒ Object
data:authorize [ID] [SCOPE].
-
#create ⇒ Object
data:create NAME.
-
#delete ⇒ Object
data:delete ID.
-
#index ⇒ Object
data.
-
#list ⇒ Object
data Lists your custom datasets.
-
#pull ⇒ Object
data:pull ID OUTPUT.
-
#pull_to_s3 ⇒ Object
data:pull:s3 ID S3_PATH.
-
#readme ⇒ Object
data:readme ID FILE.
-
#show ⇒ Object
data:show ID.
Methods inherited from Base
Methods included from Helpers
#app, #ask, #command, #create_git_remote, #display, #error, #extract_app_from_git_config, #extract_app_in_dir, #format_with_bang, #friendly_dir, #get_flow_ui_link, #get_info, #get_rich_info, #git, #handle_downloading_manifest, #has_git?, #longest, #read_multiline, #truncate_message, #version_okay?, #with_tty
Constructor Details
This class inherits a constructor from Zillabyte::Command::Base
Instance Method Details
#append ⇒ Object
data:append ID FILE
Adds data to an existing dataset.
–filetype FILETYPE # Input File format type, defaults to csv –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 |
# File 'lib/zillabyte/cli/data.rb', line 242 def append id = [:id] || shift_argument file = [:file] || shift_argument type = [:output_type] filetype = [:filetype] filetype ||= File.extname(file || "").gsub(".", "") error("no id given", type) if id.nil? error("no file given", type) if file.nil? dataset = self.api.data.get(id, ) schema = dataset["schema"] n_shards = calculate_number_of_shards(file) display("uploading content.", false) res = self.api.data.append(id, {:shards => n_shards}) if res['error'] error("#{res['error_message']}", type) else row_size = save_rows(schema, file, filetype, res['uris'], type) if type == "json" display({:rows => row_size}.to_json) else display "dataset ##{id} appended #{row_size} rows" end end end |
#authorize ⇒ Object
data:authorize [ID] [SCOPE]
changes permission on the dataset
–id ID # The dataset id –public # Makes the dataset public (default) –private # Makes the dataset private
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
# File 'lib/zillabyte/cli/data.rb', line 179 def id = [:id] || shift_argument scope = [:scope] || shift_argument || "public" make_public = [:public] make_private = [:private] error("no id given", type) if id.nil? error("both --public and --private cannot be given", type) if make_public && make_private if make_private scope = "private" end res = self.api.request( :expects => 200, :method => :post, :path => "/relations/#{CGI.escape(id)}/authorizations", :body => {:scope => scope}.to_json ).body display "Authorization updated" end |
#create ⇒ Object
data:create NAME
Creates a new dataset.
–schema SCHEMA # Column names and types in the format “field_1:output_type_1,field_2:output_type_2,…” –public SCOPE # Make the dataset public –file FILE # A data file –filetype FILETYPE # File format type, defaults to csv –description DESCRIPTION # Description of dataset contents –aliases ALIASES # Dataset name aliases in the format “alias_1,alias_2,…” –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
# File 'lib/zillabyte/cli/data.rb', line 128 def create name = [:name] || shift_argument file = [:file] || nil filetype = [:filetype] || nil type = [:output_type] error("no name given", type) if name.nil? schema = [:schema] if [:schema] is_public = [:public] || false description = [:description] || nil aliases = [:aliases] || nil hash = get_dataset_properties(schema,is_public,description,aliases) if file filetype ||= File.extname(file).gsub(".", "") n_shards = calculate_number_of_shards(file) hash[:shards] = n_shards display "Sharding into ~#{n_shards} shard(s) for upload. (Actual number may be smaller.)" if type.nil? end res = api.data.create name, hash if res['error'] error("#{res['error_message']}", type) else if file row_size = save_rows(hash[:schema], file, filetype, res['uris'], type) end if type == "json" display "{}" else display "dataset ##{res['id']} #{res['action']}. size: #{row_size || 0} rows." end end end |
#delete ⇒ Object
data:delete ID
Deletes a dataset.
-f, –force # Delete without asking for confirmation –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/zillabyte/cli/data.rb', line 76 def delete id = [:id] || shift_argument forced = [:force] type = [:output_type] || nil if !forced if !type.nil? error("specify -f, --force to confirm deletion", type) end while true display "This operation cannot be undone. Are you sure you want to delete this dataset? (yes/no):", false confirm = ask break if confirm == "yes" || confirm == "no" display "Please enter 'yes' to delete the dataset or 'no' to exit" end end confirmed = forced || confirm == "yes" if confirmed res = api.data.delete(id, ) if res['error'] error(res['error'], type) else if type == "json" display "{}" else display res["body"] end end end end |
#index ⇒ Object
data
Lists your custom datasets.
–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
18 19 20 |
# File 'lib/zillabyte/cli/data.rb', line 18 def index self.list end |
#list ⇒ Object
data
Lists your custom datasets.
–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/zillabyte/cli/data.rb', line 30 def list type = [:output_type] || nil response = api.request( :expects => 200, :method => :get, :path => "/relations" ) headings = ["id", "name","rows"] rows = response.body.map do |row| headings = row.keys if headings.size == 0 row["columns"] = row["columns"].map{|c|c['type']}.join(',') row["aliases"] = row["aliases"].map{|a| a['name']}.join(',') row = row.keep_if {|col,val| headings.include? col} vals = row.map do |col,val| if col == "rows" require("zillabyte/cli/helpers/table_output_builder") val = TableOutputBuilder.format_row_count(val) end val end vals end display "datasets\n" if type.nil? && rows.size > 0 require("zillabyte/cli/helpers/table_output_builder") display TableOutputBuilder.build_table(headings, rows, type) display "Total number of datasets: "+rows.length.to_s if type.nil? end |
#pull ⇒ Object
data:pull ID OUTPUT
Pulls dataset into OUTPUT.gz.
–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 |
# File 'lib/zillabyte/cli/data.rb', line 286 def pull id = [:id] || shift_argument file = [:file] || shift_argument type = [:output_type] error("no id given", type) if id.nil? error("no file given", type) if file.nil? res = self.api.data.pull(id, ) handle_downloading_manifest(file, res, type) if type == "json" display "{}" else display "finished pulling dataset ##{id} to file #{file}" end end |
#pull_to_s3 ⇒ Object
data:pull:s3 ID S3_PATH
Pulls dataset to s3_bucket/s3_key/part***.gz using the given s3_access and s3_secret credentials. S3_PATH may be given in the following forms:
1) s3://s3_access:s3_secret@s3_bucket/s3_key
2) s3://s3_bucket/s3_key: also supply --s3_access and --s3_secret OR set the environment variables S3_ACCESS and S3_SECRET
3) s3_key: also supply --s3_access, --s3_secret and --s3_bucket OR set the environment variables S3_ACCESS and S3_SECRET and supply --s3_bucket
–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –s3_access [s3_access_key] # S3 access key –s3_secret [s3_secret_key] # S3 secret key –s3_bucket [s3_bucket] # S3 bucket to store data at –s3_key [s3_file_key] # S3 key to store data at –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 |
# File 'lib/zillabyte/cli/data.rb', line 332 def pull_to_s3 id = [:id] || shift_argument type = [:output_type] error("no id given", type) if id.nil? s3_path = [:s3_path] || shift_argument error("no s3 path given", type) if s3_path.nil? matches = s3_path.match(/^s3:\/\/([A-Z0-9]{20}):([$-\/:-?{-~!"^_`\[\]\w]{40})@([\w\.]*)\/([-\w\/]*)$/) if !matches.nil? s3_access, s3_secret, s3_bucket, s3_key = matches.captures else matches = s3_path.match(/^s3:\/\/([\w\.]*)\/([-\w\/]*)$/) if !matches.nil? s3_bucket, s3_key = matches.captures else s3_key = s3_path end end s3_access = [:s3_access] || ENV["S3_ACCESS"] if s3_access == "" or s3_access.nil? s3_secret = [:s3_secret] || ENV["S3_SECRET"] if s3_secret == "" or s3_secret.nil? s3_bucket = [:s3_bucket] if s3_bucket == "" or s3_bucket.nil? error("No s3 access key or invalid access key provided. Please check that you have entered the access key correctly.", type) if s3_access.nil? error("No s3 access secret key or invalid secret key provided. Please check that you have entered the secret key correctly.", type) if s3_secret.nil? error("No s3 access bucket or invalid bucket provided. Please check that you have entered the bucket correctly.", type) if s3_bucket.nil? error("No s3 file key provided. Please check that you have entered the file key correctly.", type) if s3_key.nil? s3_params = {:s3_access_key => s3_access, :s3_secret => s3_secret, :s3_bucket => s3_bucket, :s3_file_key => s3_key} s3_params[:version_id] = [:version_id] if [:version_id] res = self.api.data.pull_to_s3(id, s3_params) if type == "json" display "{}" else display "downloading dataset to s3://#{res["s3_bucket"]}/#{res["s3_file_key"]}/" display "if the dataset is large, this may take a while, please check your s3 account after a few minutes" end end |
#readme ⇒ Object
data:readme ID FILE
Attaches a README file to a dataset
#HIDDEN
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/zillabyte/cli/data.rb', line 213 def readme id = [:id] || shift_argument file = [:file] || shift_argument error("no id given", type) if id.nil? error("no file given", type) if file.nil? error("file doesn't exist") unless File.exists?(file) content = File.read(file) res = self.api.request( :expects => 200, :method => :post, :path => "/relations/#{CGI.escape(id)}/readme", :body => {:filename => file, :content => content}.to_json ).body display "README updated" end |
#show ⇒ Object
data:show ID
Shows a sample of the dataset.
–version_id [version_id] # Retrieve data generated with a specific version of the app if the dataset is associated with an app [default: last version] –no_truncation # Don’t truncate long strings –meta # Show metadata columns (since, confidence, source) –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 |
# File 'lib/zillabyte/cli/data.rb', line 390 def show name = [:name] || shift_argument type = [:output_type] = [:meta] || false error "no id given" if name.nil? # Initial request.. res = self.api.data.show(name, :post, ) if res['job_id'] job_id = res['job_id'] [:job_id] = job_id # Poll until the results are ready... start = Time.now.utc display "Fetching your data, please wait..." if type.nil? while(Time.now.utc < start + MAX_POLL_SECONDS) do # Poll res = self.api.data.show(name, :get, ) # Status? case res['status'] when 'completed' if res['return'] res = res['return'] else throw "something is wrong: #{res}" end # success! continue below break when 'running' sleep(POLL_SLEEP) # display ".", false else throw "unknown status: #{res}" end end else if res['error'] error(res['error_message'] || res['error'], type) else error("remote server error (r256)", type) end end # We only reach here after polling is complete... if res["rows"] && res["rows"].size > 0 headings = [] concrete_headings = res["rows"].first.keys concrete_headings.delete("id") META_COLUMNS.each {|c| concrete_headings.delete c} if (!) concrete_headings.each do |ch| has_alias = false (res['column_aliases'] || []).each do |al| if(al["concrete_name"] == ch) headings << al["alias"] has_alias = true end end headings << ch if !has_alias end rows = [] res["rows"].each do |obj| new_row = concrete_headings.map do |heading| if [:no_truncation] obj[heading] else if obj[heading].to_s.size > 30 && type != "json" obj[heading].to_s[0..30] + "..." else obj[heading] end end end rows << new_row end require("colorize") require("zillabyte/cli/helpers/table_output_builder") if type.nil? display "Sampled output:" display TableOutputBuilder.build_terminal_table(headings, rows) display "To download your full dataset, type "+"`zillabyte data:pull [RELATION_NAME] [OUTPUT_PREFIX] [DIRECTORY]`".colorize(:green)+"." else display TableOutputBuilder.build_table(headings, rows, type) end else if type == "json" display "{}" else display "empty dataset" end end end |