Class: RailsDataExplorer

Inherits:
Object
  • Object
show all
Defined in:
lib/rails_data_explorer.rb,
lib/rails_data_explorer/chart.rb,
lib/rails_data_explorer/engine.rb,
lib/rails_data_explorer/data_set.rb,
lib/rails_data_explorer/data_type.rb,
lib/rails_data_explorer/data_series.rb,
lib/rails_data_explorer/exploration.rb,
lib/rails_data_explorer/chart/box_plot.rb,
lib/rails_data_explorer/chart/pie_chart.rb,
lib/rails_data_explorer/utils/rde_table.rb,
lib/rails_data_explorer/chart/scatterplot.rb,
lib/rails_data_explorer/utils/color_scale.rb,
lib/rails_data_explorer/utils/data_binner.rb,
lib/rails_data_explorer/chart/parallel_set.rb,
lib/rails_data_explorer/chart/box_plot_group.rb,
lib/rails_data_explorer/utils/data_quantizer.rb,
lib/rails_data_explorer/action_view_extension.rb,
lib/rails_data_explorer/data_type/categorical.rb,
lib/rails_data_explorer/utils/value_formatter.rb,
lib/rails_data_explorer/data_type/quantitative.rb,
lib/rails_data_explorer/active_record_extension.rb,
lib/rails_data_explorer/chart/contingency_table.rb,
lib/rails_data_explorer/statistics/rng_category.rb,
lib/rails_data_explorer/statistics/rng_gaussian.rb,
lib/rails_data_explorer/chart/histogram_temporal.rb,
lib/rails_data_explorer/statistics/rng_power_law.rb,
lib/rails_data_explorer/chart/parallel_coordinates.rb,
lib/rails_data_explorer/chart/histogram_categorical.rb,
lib/rails_data_explorer/chart/histogram_quantitative.rb,
lib/rails_data_explorer/data_type/quantitative/decimal.rb,
lib/rails_data_explorer/data_type/quantitative/integer.rb,
lib/rails_data_explorer/data_type/quantitative/temporal.rb,
lib/rails_data_explorer/chart/stacked_histogram_temporal.rb,
lib/rails_data_explorer/chart/descriptive_statistics_table.rb,
lib/rails_data_explorer/chart/stacked_bar_chart_categorical.rb,
lib/rails_data_explorer/chart/stacked_bar_chart_categorical_percent.rb,
lib/rails_data_explorer/statistics/pearsons_chi_squared_independence_test.rb

Overview

Responsibilities:

* Integrate all the pieces required for this gem
* Initialize a collection of Explorations

Collaborators:

* Exploration

Defined Under Namespace

Modules: ActionViewExtension, ActiveRecordExtension, Statistics, Utils Classes: Chart, DataSeries, DataSet, DataType, Engine, Exploration

Constant Summary collapse

GREATER_ZERO =

The smallest value to use if we have to avoid zero (div by zero)

1.0 / 1_000_000

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_collection, data_series_specs, explorations_to_render) ⇒ RailsDataExplorer

Top level initialization. This is what you use to explore your data.

Parameters:

  • data_collection (Array<Array>)

    Outer array is the container, inner array represents each record (row of data).

  • data_series_specs (Array<Hash>)

    One Hash for each data series.

  • explorations_to_render (Hash)

    A hash to specify which explorations you want rendered. Example data structure:

    {
      "univariate" => {
        "1" => ["Hour of day"],
      },
      "bivariate" => {
        "1" => ["Context", "Release (major)"],
        "2" => ["Year", "Timezone"],
      }
    }
    

Options Hash (data_series_specs):

  • :name (String)

    the name of the data series

  • :data_method (Proc)

    a proc that will return the data for this column. Valid types are String, Numeric or Date/Time.

  • :note (String)

    any comment you want to make for this data series. Will be printed with each chart that uses this data series.

  • :univariate (Boolean, String)

    override to always render univariate chart and statistics for this data series. Defaults to true.

  • :bivariate (Boolean, String)

    override to always render bivariate chart and statistics for this data series. Defaults to false.

  • :multivariate (Boolean, String)

    override to always render multivariate chart and statistics for this data series. Defaults to false.

  • :max_num_distinct_values (Integer)

    override the max number of distinct values for categorical data. Default: 20.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/rails_data_explorer.rb', line 51

def initialize(data_collection, data_series_specs, explorations_to_render)
  @explorations = []
  charts = {
    univariate: [],
    bivariate: {},
    multivariate: {}
  }
  @data_series_names = data_series_specs.map { |e| e[:name] }

  @cached_data_series = data_series_specs.inject({}) { |m, ds_spec|
    m[ds_spec[:name]] = DataSeries.new(
      ds_spec[:name],
      data_collection.map(&ds_spec[:data_method]),
      ds_spec
    )
    m
  }

  # Default to all univariate explorations
  explorations_to_render = (
    explorations_to_render || @data_series_names.inject({ univariate: {}}) { |m,e|
      m[:univariate][e] = [e]
      m
    }
  ).symbolize_keys

  # Build list of all available explorations (rendered and not rendered),
  # grouped by type_of_analysis
  data_series_specs.each do |data_series_spec|

    charts[:univariate] << data_series_spec.dup

    charts[:bivariate]['rde-default'] ||= []
    charts[:bivariate]['rde-default'] << data_series_spec.dup

    # No defaults for multivariate yet... Have to be specified manually via
    # data_series specs
    if data_series_spec[:multivariate]
      [*data_series_spec[:multivariate]].each { |group_key|
        group_key = group_key.to_s
        charts[:multivariate][group_key] ||= []
        charts[:multivariate][group_key] << data_series_spec.dup
      }
    end

  end

  charts[:univariate].uniq.compact.each { |data_series_spec|
    @explorations << Exploration.new(
      data_series_spec[:name],
      DataSet.new(
        [@cached_data_series[data_series_spec[:name]]],
        data_series_spec[:name]
      ),
      render_exploration_for?(
        explorations_to_render,
        :univariate,
        [data_series_spec[:name]]
      )
    )
  }

  charts[:bivariate].each { |group_key, bv_data_series_specs|
    next  unless group_key # skip if key is falsey
    bv_data_series_specs.uniq.compact.combination(2) { |ds_specs_pair|
      @explorations << build_exploration_from_data_series_specs(
        data_collection,
        ds_specs_pair,
        render_exploration_for?(
          explorations_to_render,
          :bivariate,
          ds_specs_pair.map { |e| e[:name] }
        )
      )
    }
  }

  charts[:multivariate].each { |group_key, mv_data_series_specs|
    next  unless group_key # skip key `false` or `nil`
    ds_specs = mv_data_series_specs.uniq.compact
    @explorations << build_exploration_from_data_series_specs(
      data_collection,
      ds_specs,
      true # always render multivariate since they are specified manually
    )
  }
end

Instance Attribute Details

#data_series_namesObject (readonly)

Returns the value of attribute data_series_names.



14
15
16
# File 'lib/rails_data_explorer.rb', line 14

def data_series_names
  @data_series_names
end

#explorationsObject (readonly)

Returns the value of attribute explorations.



13
14
15
# File 'lib/rails_data_explorer.rb', line 13

def explorations
  @explorations
end

Instance Method Details

#explorations_with_charts_availableArray<Exploration>

Returns:



140
141
142
# File 'lib/rails_data_explorer.rb', line 140

def explorations_with_charts_available
  explorations.find_all { |e| e.charts.any? }
end

#explorations_with_charts_to_renderArray<Exploration>

Returns:



145
146
147
# File 'lib/rails_data_explorer.rb', line 145

def explorations_with_charts_to_render
  explorations_with_charts_available.find_all { |e| e.render_charts? }
end

#explorations_with_no_charts_availableArray<Exploration>

Returns:



150
151
152
# File 'lib/rails_data_explorer.rb', line 150

def explorations_with_no_charts_available
  explorations.find_all { |e| e.charts.empty? }
end

#number_of_valuesInteger

Returns:

  • (Integer)


155
156
157
# File 'lib/rails_data_explorer.rb', line 155

def number_of_values
  explorations.first.number_of_values
end