Class: Zillabyte::Command::Data
Overview
manage custom datasets
Constant Summary collapse
- MAX_POLL_SECONDS =
60 * 5
- POLL_SLEEP =
1- APPENDS_ROWS_SLICE =
5_000
Constants inherited from Base
Instance Attribute Summary
Attributes inherited from Base
Instance Method Summary collapse
-
#append ⇒ Object
data:append ID FILE.
-
#authorize ⇒ Object
data:authorize [ID] [SCOPE].
-
#create ⇒ Object
data:create NAME.
-
#delete ⇒ Object
data:delete ID.
-
#index ⇒ Object
data.
-
#list ⇒ Object
data Lists your custom datasets.
-
#pull ⇒ Object
data:pull ID OUTPUT.
-
#pull_to_s3 ⇒ Object
data:pull:s3 ID S3_PATH.
-
#readme ⇒ Object
data:readme ID FILE.
-
#show ⇒ Object
data:show ID.
Methods inherited from Base
Methods included from Helpers
#app, #ask, #command, #create_git_remote, #display, #error, #extract_app_from_git_config, #extract_app_in_dir, #format_with_bang, #friendly_dir, #get_flow_ui_link, #get_info, #get_rich_info, #git, #handle_downloading_manifest, #has_git?, #longest, #read_multiline, #truncate_message, #with_tty
Constructor Details
This class inherits a constructor from Zillabyte::Command::Base
Instance Method Details
#append ⇒ Object
data:append ID FILE
Adds data to an existing dataset.
–filetype FILETYPE # Input File format type, defaults to csv –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 |
# File 'lib/zillabyte/cli/data.rb', line 246 def append id = [:id] || shift_argument file = [:file] || shift_argument type = [:output_type] filetype = [:filetype] filetype ||= File.extname(file || "").gsub(".", "") error("no id given", type) if id.nil? error("no file given", type) if file.nil? dataset = self.api.data.get(id, ) columns = dataset["columns"].map{|col| {col["index"] => col["type"]}} raw_rows = sanity_check_file(file,filetype,{"columns" => columns}, type) total_rows = 0 display("uploading content.", false) raw_rows.each_slice(APPENDS_ROWS_SLICE) do |rows| # TODO: post to direct signed s3 (http://docs.aws.amazon.com/AWSRubySDK/latest/AWS/S3/PresignedPost.html) display(".", false) require("base64") res = self.api.data.append(id, {:gzip_rows => Base64.encode64(gzip(rows.to_json()))}) # res = self.api.data.append(id, {:rows => rows}) break unless res["size"] total_rows += res["size"] end if type == "json" display({:rows => total_rows}.to_json) else display "dataset ##{id} appended #{total_rows} rows" end end |
#authorize ⇒ Object
data:authorize [ID] [SCOPE]
changes permission on the dataset
–id ID # The dataset id –public # Makes the dataset public (default) –private # Makes the dataset private
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# File 'lib/zillabyte/cli/data.rb', line 183 def id = [:id] || shift_argument scope = [:scope] || shift_argument || "public" make_public = [:public] make_private = [:private] error("no id given", type) if id.nil? error("both --public and --private cannot be given", type) if make_public && make_private if make_private scope = "private" end res = self.api.request( :expects => 200, :method => :post, :path => "/relations/#{CGI.escape(id)}/authorizations", :body => {:scope => scope}.to_json ).body display "Authorization updated" end |
#create ⇒ Object
data:create NAME
Creates a new dataset.
–schema SCHEMA # Column names and types in the format “field_1:output_type_1,field_2:output_type_2,…” –public SCOPE # Make the dataset public –file FILE # A data file –filetype FILETYPE # File format type, defaults to csv –description DESCRIPTION # Description of dataset contents –aliases ALIASES # Dataset name aliases in the format “alias_1,alias_2,…” –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/zillabyte/cli/data.rb', line 127 def create name = [:name] || shift_argument file = [:file] || nil filetype = [:filetype] || nil type = [:output_type] error("no name given", type) if name.nil? schema = [:schema] if [:schema] is_public = [:public] || false description = [:description] || nil aliases = [:aliases] || nil if type.nil? hash = get_dataset_properties(schema,is_public,description,aliases) else hash = hash_dataset_properties(schema,is_public,description,aliases, type) end if file filetype ||= File.extname(file).gsub(".", "") rows = sanity_check_file(file,filetype, {"columns" => hash[:schema]}, type) hash[:rows] = rows end res = api.data.create name, hash if res['error'] error("#{res['error_message']}", type) else if type == "json" display "{}" else display "dataset ##{res['id']} #{res['action']}. size: #{res['size'] || 0} rows." end end end |
#delete ⇒ Object
data:delete ID
Deletes a dataset.
-f, –force # Delete without asking for confirmation –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/zillabyte/cli/data.rb', line 75 def delete id = [:id] || shift_argument forced = [:force] type = [:output_type] || nil if !forced if !type.nil? error("specify -f, --force to confirm deletion", type) end while true display "This operation cannot be undone. Are you sure you want to delete this dataset? (yes/no):", false confirm = ask break if confirm == "yes" || confirm == "no" display "Please enter 'yes' to delete the dataset or 'no' to exit" end end confirmed = forced || confirm == "yes" if confirmed res = api.data.delete(id, ) if res['error'] error(res['error'], type) else if type == "json" display "{}" else display res["body"] end end end end |
#index ⇒ Object
data
Lists your custom datasets.
–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
17 18 19 |
# File 'lib/zillabyte/cli/data.rb', line 17 def index self.list end |
#list ⇒ Object
data
Lists your custom datasets.
–output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/zillabyte/cli/data.rb', line 29 def list type = [:output_type] || nil response = api.request( :expects => 200, :method => :get, :path => "/relations" ) headings = ["id", "name","rows"] rows = response.body.map do |row| headings = row.keys if headings.size == 0 row["columns"] = row["columns"].map{|c|c['type']}.join(',') row["aliases"] = row["aliases"].map{|a| a['name']}.join(',') row = row.keep_if {|col,val| headings.include? col} vals = row.map do |col,val| if col == "rows" require("zillabyte/cli/helpers/table_output_builder") val = TableOutputBuilder.format_row_count(val) end val end vals end display "datasets\n" if type.nil? && rows.size > 0 require("zillabyte/cli/helpers/table_output_builder") display TableOutputBuilder.build_table(headings, rows, type) display "Total number of datasets: "+rows.length.to_s if type.nil? end |
#pull ⇒ Object
data:pull ID OUTPUT
Pulls dataset into OUTPUT.gz.
–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
# File 'lib/zillabyte/cli/data.rb', line 297 def pull id = [:id] || shift_argument file = [:file] || shift_argument type = [:output_type] error("no id given", type) if id.nil? error("no file given", type) if file.nil? file = "#{file}.gz" unless File.extname(file) == ".gz" res = self.api.data.pull(id, ) handle_downloading_manifest(file, res, type) if type == "json" display "{}" else display "finished pulling dataset ##{id} to file" end end |
#pull_to_s3 ⇒ Object
data:pull:s3 ID S3_PATH
Pulls dataset to s3_bucket/s3_key/part***.gz using the given s3_access and s3_secret credentials. S3_PATH may be given in the following forms:
1) s3://s3_access:s3_secret@s3_bucket/s3_key
2) s3://s3_bucket/s3_key: also supply --s3_access and --s3_secret OR set the environment variables S3_ACCESS and S3_SECRET
3) s3_key: also supply --s3_access, --s3_secret and --s3_bucket OR set the environment variables S3_ACCESS and S3_SECRET and supply --s3_bucket
–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –s3_access [s3_access_key] # S3 access key –s3_secret [s3_secret_key] # S3 secret key –s3_bucket [s3_bucket] # S3 bucket to store data at –s3_key [s3_file_key] # S3 key to store data at –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 |
# File 'lib/zillabyte/cli/data.rb', line 345 def pull_to_s3 id = [:id] || shift_argument type = [:output_type] error("no id given", type) if id.nil? s3_path = [:s3_path] || shift_argument error("no s3 path given", type) if s3_path.nil? matches = s3_path.match(/^s3:\/\/([A-Z0-9]{20}):([$-\/:-?{-~!"^_`\[\]\w]{40})@([\w\.]*)\/([-\w\/]*)$/) if !matches.nil? s3_access, s3_secret, s3_bucket, s3_key = matches.captures else matches = s3_path.match(/^s3:\/\/([\w\.]*)\/([-\w\/]*)$/) if !matches.nil? s3_bucket, s3_key = matches.captures else s3_key = s3_path end end s3_access = [:s3_access] || ENV["S3_ACCESS"] if s3_access == "" or s3_access.nil? s3_secret = [:s3_secret] || ENV["S3_SECRET"] if s3_secret == "" or s3_secret.nil? s3_bucket = [:s3_bucket] if s3_bucket == "" or s3_bucket.nil? error("No s3 access key or invalid access key provided. Please check that you have entered the access key correctly.", type) if s3_access.nil? error("No s3 access secret key or invalid secret key provided. Please check that you have entered the secret key correctly.", type) if s3_secret.nil? error("No s3 access bucket or invalid bucket provided. Please check that you have entered the bucket correctly.", type) if s3_bucket.nil? error("No s3 file key provided. Please check that you have entered the file key correctly.", type) if s3_key.nil? s3_params = {:s3_access_key => s3_access, :s3_secret => s3_secret, :s3_bucket => s3_bucket, :s3_file_key => s3_key} s3_params[:cycle_id] = [:cycle_id] if [:cycle_id] res = self.api.data.pull_to_s3(id, s3_params) if type == "json" display "{}" else display "downloading dataset to s3://#{res["s3_bucket"]}/#{res["s3_file_key"]}/" display "if the dataset is large, this may take a while, please check your s3 account after a few minutes" end end |
#readme ⇒ Object
data:readme ID FILE
Attaches a README file to a dataset
#HIDDEN
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/zillabyte/cli/data.rb', line 217 def readme id = [:id] || shift_argument file = [:file] || shift_argument error("no id given", type) if id.nil? error("no file given", type) if file.nil? error("file doesn't exist") unless File.exists?(file) content = File.read(file) res = self.api.request( :expects => 200, :method => :post, :path => "/relations/#{CGI.escape(id)}/readme", :body => {:filename => file, :content => content}.to_json ).body display "README updated" end |
#show ⇒ Object
data:show ID
Shows a sample of the dataset. See ‘zillabyte queries’ for more elaborate functionality.
–cycle_id [cycle_id] # Retrieve data generated during specified cycle if dataset is associated with an app [default: last cycle] –no_truncation # Don’t truncate long strings –meta # Show metadata columns (since, confidence, source) –output_type OUTPUT_TYPE # Specify an output type i.e. json #HIDDEN
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 |
# File 'lib/zillabyte/cli/data.rb', line 404 def show name = [:name] || shift_argument type = [:output_type] = [:meta] || false error "no id given" if name.nil? # Initial request.. res = self.api.data.show(name, :post, ) if res['job_id'] job_id = res['job_id'] [:job_id] = job_id # Poll until the results are ready... start = Time.now.utc display "Fetching your data, please wait..." if type.nil? while(Time.now.utc < start + MAX_POLL_SECONDS) do # Poll res = self.api.data.show(name, :get, ) # Status? case res['status'] when 'completed' if res['return'] res = res['return'] else throw "something is wrong: #{res}" end # success! continue below break when 'running' sleep(POLL_SLEEP) # display ".", false else throw "unknown status: #{res}" end end else if res['error'] error(res['error_message'] || res['error'], type) else error("remote server error (r256)", type) end end # We only reach here after polling is complete... if res["rows"] && res["rows"].size > 0 headings = [] concrete_headings = res["rows"].first.keys concrete_headings.delete("id") META_COLUMNS.each {|c| concrete_headings.delete c} if (!) concrete_headings.each do |ch| has_alias = false (res['column_aliases'] || []).each do |al| if(al["concrete_name"] == ch) headings << al["alias"] has_alias = true end end headings << ch if !has_alias end rows = [] res["rows"].each do |obj| new_row = concrete_headings.map do |heading| if [:no_truncation] obj[heading] else if obj[heading].to_s.size > 30 && type != "json" obj[heading].to_s[0..30] + "..." else obj[heading] end end end rows << new_row end require("colorize") require("zillabyte/cli/helpers/table_output_builder") if type.nil? display "Sampled output:" display TableOutputBuilder.build_terminal_table(headings, rows) display "To download your full dataset, type "+"`zillabyte data:pull [RELATION_NAME] [OUTPUT_PREFIX] [DIRECTORY]`".colorize(:green)+"." else display TableOutputBuilder.build_table(headings, rows, type) end else if type == "json" display "{}" else display "empty dataset" end end end |