Module: GoodData::Model

Defined in:: lib/gooddata/models/model.rb,
lib/gooddata/models/from_wire.rb,
lib/gooddata/models/tab_builder.rb,
lib/gooddata/models/project_creator.rb,
lib/gooddata/models/module_constants.rb,
lib/gooddata/models/blueprint/to_wire.rb,
lib/gooddata/models/blueprint/fact_field.rb,
lib/gooddata/models/blueprint/label_field.rb,
lib/gooddata/models/blueprint/to_manifest.rb,
lib/gooddata/models/blueprint/anchor_field.rb,
lib/gooddata/models/blueprint/date_dimension.rb,
lib/gooddata/models/blueprint/schema_builder.rb,
lib/gooddata/models/blueprint/attribute_field.rb,
lib/gooddata/models/blueprint/blueprint_field.rb,
lib/gooddata/models/blueprint/project_builder.rb,
lib/gooddata/models/blueprint/reference_field.rb,
lib/gooddata/models/blueprint/schema_blueprint.rb,
lib/gooddata/models/blueprint/dashboard_builder.rb,
lib/gooddata/models/blueprint/dataset_blueprint.rb,
lib/gooddata/models/blueprint/project_blueprint.rb

Defined Under Namespace

Modules: FromWire, ToManifest, ToWire Classes: AnchorBlueprintField, AttributeBlueprintField, BlueprintField, DashboardBuilder, DatasetBlueprint, DateDimension, FactBlueprintField, LabelBlueprintField, ProjectBlueprint, ProjectBuilder, ProjectCreator, ReferenceBlueprintField, SchemaBlueprint, SchemaBuilder, TabBuilder

Constant Summary collapse

GD_TYPES = See https://confluence.intgdc.com/display/plat/Catalog+of+Attribute+Types

[
  # Common Types
  'GDC.link',
  'GDC.text',
  'GDC.time',

  # Common Date Attribute Types
  'GDC.time.year',
  'GDC.time.quarter',
  'GDC.time.month',
  'GDC.time.week',
  'GDC.time.date',

  # Specific Date Attribute Types
  'GDC.time.day_in_euweek',
  'GDC.time.day_in_week',
  'GDC.time.day_in_month',
  'GDC.time.day_in_quarter',
  'GDC.time.day_in_year',
  'GDC.time.euweek_in_quarter',
  'GDC.time.week_in_quarter',
  'GDC.time.euweek_in_year',
  'GDC.time.week_in_year',
  'GDC.time.month_in_quarter',
  'GDC.time.month_in_year',
  'GDC.time.quarter_in_year',

  # Legacy Date Attribute Types - Possibly Obsolete
  'GDC.time.dayOfWeek',
  'GDC.time.dayOfMonth',
  'GDC.time.dayOfQuarter',
  'GDC.time.dayOfYear',
  'GDC.time.weekOfYear',
  'GDC.time.monthOfYear',
  'GDC.time.quarterOfYear',

  # Types for Geo
  'GDC.geo.pin',                 # Geo pushpin
  'GDC.geo.ausstates.name',      # Australia States (Name)
  'GDC.geo.ausstates.code',      # Australia States (ISO code)
  'GDC.geo.usstates.name',       # US States (Name)
  'GDC.geo.usstates.geo_id',     # US States (US Census ID)
  'GDC.geo.usstates.code',       # US States (2-letter code)
  'GDC.geo.uscounties.geo_id',   # US Counties (US Census ID)
  'GDC.geo.worldcountries.name', # World countries (Name)
  'GDC.geo.worldcountries.iso2', # World countries (ISO a2)
  'GDC.geo.worldcountries.iso3', # World countries (ISO a3)
  'GDC.geo.czdistricts.name',    #	Czech Districts (Name)
  'GDC.geo.czdistricts.name_no_diacritics', # Czech Districts
  'GDC.geo.czdistricts.nuts4',   # Czech Districts (NUTS 4)
  'GDC.geo.czdistricts.knok',    # Czech Districts (KNOK)

  # Day Display Forms
  'GDC.time.day',              # yyyy-MM-dd
  'GDC.time.day_us',           # MM/dd/yyyy
  'GDC.time.day_eu',           # dd/MM/yyyy
  'GDC.time.day_iso',          # dd-MM-yyyy
  'GDC.time.day_us_long',      # EEE, MMM d, yyyy
  'GDC.time.day_us_noleading', # M/d/yy
]

GD_DATA_TYPES =

['BIGINT', 'DOUBLE', 'INTEGER', 'INT', /^VARCHAR\(\d{1,4}\)$/i, /^DECIMAL\(\d{1,3},\s*\d{1,3}\)$/i]

DEFAULT_FACT_DATATYPE =

'DECIMAL(12,2)'

DEFAULT_ATTRIBUTE_DATATYPE =

'VARCHAR(128)'

DEFAULT_TYPE =

'GDC.text'

DEFAULT_DATE_FORMAT =

'MM/dd/yyyy'

LDM_CTG = GoodData REST API categories

'ldm'

LDM_MANAGE_CTG =

'ldm-manage2'

FIELD_PK = Model naming conventions

'id'

FK_SUFFIX =

'_id'

FACT_COLUMN_PREFIX =

'f_'

DATE_COLUMN_PREFIX =

'dt_'

TIME_COLUMN_PREFIX =

'tm_'

LABEL_COLUMN_PREFIX =

'nm_'

ATTRIBUTE_FOLDER_PREFIX =

'dim'

ATTRIBUTE_PREFIX =

'attr'

LABEL_PREFIX =

'label'

FACT_PREFIX =

'fact'

DATE_FACT_PREFIX =

'dt'

DATE_ATTRIBUTE =

'date'

DATE_ATTRIBUTE_DEFAULT_DISPLAY_FORM =

'mdyy'

TIME_FACT_PREFIX =

'tm.dt'

TIME_ATTRIBUTE_PREFIX =

'attr.time'

FACT_FOLDER_PREFIX =

'ffld'

Class Method Summary collapse

.check_gd_data_type(value) ⇒ Object
.check_gd_type(value) ⇒ Object
.column_name(item) ⇒ Object
.description(item) ⇒ Object
.merge_dataset_columns(a_schema_blueprint, b_schema_blueprint) ⇒ Object
.normalize_gd_data_type(type) ⇒ Object
.title(item) ⇒ Object
.upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ Object
Load given file into a data set described by the given schema.
.upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ Hash
Uploads multiple data sets using batch upload interface.

Class Method Details

.check_gd_data_type(value) ⇒ `Object`

# File 'lib/gooddata/models/model.rb', line 115

def check_gd_data_type(value)
  GD_DATA_TYPES.any? do |v|
    case v
    when Regexp
      v =~ value
    when String
      v == (value && value.upcase)
    else
      fail 'Unkown predicate'
    end
  end
end

.check_gd_type(value) ⇒ `Object`



111
112
113

# File 'lib/gooddata/models/model.rb', line 111

def check_gd_type(value)
  GD_TYPES.any? { |v| v == value }
end

.column_name(item) ⇒ `Object`



103
104
105

# File 'lib/gooddata/models/model.rb', line 103

def column_name(item)
  item[:column_name] || item[:id]
end

.description(item) ⇒ `Object`



107
108
109

# File 'lib/gooddata/models/model.rb', line 107

def description(item)
  item[:description]
end

.merge_dataset_columns(a_schema_blueprint, b_schema_blueprint) ⇒ `Object`

# File 'lib/gooddata/models/model.rb', line 264

def merge_dataset_columns(a_schema_blueprint, b_schema_blueprint)
  a_schema_blueprint = a_schema_blueprint.to_hash
  b_schema_blueprint = b_schema_blueprint.to_hash
  d = GoodData::Helpers.deep_dup(a_schema_blueprint)
  d[:columns] = d[:columns] + b_schema_blueprint[:columns]
  d[:columns].uniq!
  columns_that_failed_to_merge = d[:columns].group_by { |x| [:reference, :date].include?(x[:type]) ? x[:dataset] : x[:id] }.map { |k, v| [k, v.count, v] }.select { |x| x[1] > 1 }
  unless columns_that_failed_to_merge.empty?
    columns_that_failed_to_merge.each do |error|
      GoodData.logger.error "Columns #{error[0]} failed to merge. There are #{error[1]} conflicting columns. When merging columns with the same name they have to be identical."
      GoodData.logger.error error[2]
    end
    fail "Columns #{columns_that_failed_to_merge.first} failed to merge. There are #{columns_that_failed_to_merge[1]} conflicting columns. #{columns_that_failed_to_merge[2]} When merging columns with the same name they have to be identical." unless columns_that_failed_to_merge.empty?
  end
  d
end

.normalize_gd_data_type(type) ⇒ `Object`

# File 'lib/gooddata/models/model.rb', line 128

def normalize_gd_data_type(type)
  if type && type.upcase == 'INTEGER'
    'INT'
  else
    type
  end
end

.title(item) ⇒ `Object`



99
100
101

# File 'lib/gooddata/models/model.rb', line 99

def title(item)
  item[:title] || GoodData::Helpers.titleize(item[:id])
end

.upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ `Object`

Load given file into a data set described by the given schema

# File 'lib/gooddata/models/model.rb', line 137

def upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project })
  data = [
    {
      data: path,
      dataset: dataset,
      options: options
    }
  ]
  GoodData::Model.upload_multiple_data(data, project_blueprint, options)
end

.upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ `Hash`

Uploads multiple data sets using batch upload interface

Parameters:

data (String|Array) —
Input data
project_blueprint (ProjectBlueprint) —
Project blueprint
options (Hash) (defaults to: { :client => GoodData.connection, :project => GoodData.project }) —
Additional options

Returns:

(Hash) —
Batch upload result

# File 'lib/gooddata/models/model.rb', line 153

def upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project })
  client, project = GoodData.get_client_and_project(options)

  project ||= GoodData.project

  manifest = {

    'dataSetSLIManifestList' => data.map do |d|
      mode = d[:options] && d[:options][:mode] ? d[:options][:mode] : options[:mode] || 'FULL'
      GoodData::Model::ToManifest.dataset_to_manifest(project_blueprint, d[:dataset], mode)
    end
  }

  csv_headers = []

  # create a temporary zip file
  dir = Dir.mktmpdir
  begin
    Zip::File.open("#{dir}/upload.zip", Zip::File::CREATE) do |zip|
      # TODO: make sure schema columns match CSV column names
      zip.get_output_stream('upload_info.json') { |f| f.puts JSON.pretty_generate(manifest) }

      data.zip(manifest['dataSetSLIManifestList']).each do |item|
        path = item[0][:data]
        path = item[0][:data].path if item[0][:data].respond_to? :path
        inline_data = !path.is_a?(String)
        csv_header = nil

        filename = item[1]['dataSetSLIManifest']['file']

        if inline_data
          csv_header = path.first
          zip.get_output_stream(filename) do |f|
            path.each do |row|
              f.puts row.to_csv
            end
          end
        else
          csv_header = File.open(path, &:gets).split(',')
          zip.add(filename, path)
        end

        csv_headers << csv_header
      end
    end

    # upload it
    client.upload_to_user_webdav("#{dir}/upload.zip", :directory => File.basename(dir), :client => options[:client], :project => options[:project])
  ensure
    FileUtils.rm_rf dir
  end
  csv_headers.flatten!

  # kick the load
  pull = { 'pullIntegration' => File.basename(dir) }
  link = project.md.links('etl')['pull2']

  # TODO: List uploaded datasets
  task = client.post(link, pull, :info_message => 'Starting the data load from user storage to dataset.')

  res = client.poll_on_response(task['pull2Task']['links']['poll'], :info_message => 'Getting status of the dataload task.') do |body|
    body['wTaskStatus']['status'] == 'RUNNING' || body['wTaskStatus']['status'] == 'PREPARED'
  end

  if res['wTaskStatus']['status'] == 'ERROR'
    s = StringIO.new

    messages = res['wTaskStatus']['messages'] || []
    messages.each do |msg|
      GoodData.logger.error(JSON.pretty_generate(msg))
    end

    begin
      client.download_from_user_webdav(File.basename(dir) + '/upload_status.json', s, :client => client, :project => project)
    rescue => e
      raise "Unable to download upload_status.json from remote server, reason: #{e.message}"
    end

    js = MultiJson.load(s.string)
    manifests = manifest['dataSetSLIManifestList'].map do |m|
      m['dataSetSLIManifest']
    end

    parts = manifests.map do |m|
      m['parts']
    end

    manifest_cols = parts.flatten.map { |c| c['columnName'] }

    # extract some human readable error message from the webdav file
    manifest_extra = manifest_cols - csv_headers
    csv_extra = csv_headers - manifest_cols

    error_message = begin
      js['error']['message'] % js['error']['parameters']
    rescue NoMethodError, ArgumentError
      ''
    end
    m = "Load failed with error '#{error_message}'.\n"
    m += "Columns that should be there (manifest) but aren't in uploaded csv: #{manifest_extra}\n" unless manifest_extra.empty?
    m += "Columns that are in csv but shouldn't be there (manifest): #{csv_extra}\n" unless csv_extra.empty?
    m += "Columns in the uploaded csv: #{csv_headers}\n"
    m += "Columns in the manifest: #{manifest_cols}\n"
    m += "Original message:\n#{JSON.pretty_generate(js)}\n"
    m += "Manifest used for uploading:\n#{JSON.pretty_generate(manifest)}"
    fail m
  end

  res
end

Module: GoodData::Model

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.check_gd_data_type(value) ⇒ Object

.check_gd_type(value) ⇒ Object

.column_name(item) ⇒ Object

.description(item) ⇒ Object

.merge_dataset_columns(a_schema_blueprint, b_schema_blueprint) ⇒ Object

.normalize_gd_data_type(type) ⇒ Object

.title(item) ⇒ Object

.upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ Object

.upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project }) ⇒ Hash