Class: Remi::DataSource::Postgres

Inherits:
Object
  • Object
show all
Includes:
Remi::DataSource, DataStub
Defined in:
lib/remi/cucumber/data_source.rb,
lib/remi/data_source/postgres.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Remi::DataSource

#feild_symbolizer

Methods included from Remi::DataSubject

#df=, #field_symbolizer

Methods included from DataStub

#empty_stub_df, #stub_df, #stub_row_array, #stub_values

Constructor Details

#initialize(fields: {}, credentials:, query:, logger: Remi::Settings.logger) ⇒ Postgres

Returns a new instance of Postgres.



6
7
8
9
10
11
# File 'lib/remi/data_source/postgres.rb', line 6

def initialize(fields: {}, credentials:, query:, logger: Remi::Settings.logger)
  @fields = fields
  @credentials = credentials
  @query = query
  @logger = logger
end

Instance Attribute Details

#fieldsObject

Returns the value of attribute fields.



13
14
15
# File 'lib/remi/data_source/postgres.rb', line 13

def fields
  @fields
end

Instance Method Details

#dfObject



53
54
55
# File 'lib/remi/data_source/postgres.rb', line 53

def df
  @dataframe ||= to_dataframe
end

#extractObject



15
16
17
18
# File 'lib/remi/data_source/postgres.rb', line 15

def extract
  @logger.info "Executing query #{@query}"
  @raw_result = pg_conn.exec @query
end

#pg_connObject



24
25
26
27
28
29
30
31
32
33
# File 'lib/remi/data_source/postgres.rb', line 24

def pg_conn
  @pg_conn ||= PG.connect(
    host:     @credentials[:host] || 'localhost',
    port:     @credentials[:port] || 5432,
    dbname:   @credentials[:dbname],
    user:     @credentials[:user] || `whoami`.chomp,
    password: @credentials[:password],
    sslmode:  @credentials[:sslmode] || 'require'
  )
end

#raw_resultObject



20
21
22
# File 'lib/remi/data_source/postgres.rb', line 20

def raw_result
  @raw_result ||= extract
end

#to_dataframeObject



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/remi/data_source/postgres.rb', line 36

def to_dataframe
  # Performance for larger sets could be improved by using bulk query (via COPY)
  @logger.info "Converting query to a dataframe"

  hash_array = {}
  raw_result.each do |row|
    row.each do |field, value|
      (hash_array[field_symbolizer.call(field)] ||= []) << value
    end
  end

  # After converting to DF, clear the PG results to save memory.
  raw_result.clear

  Daru::DataFrame.new hash_array, order: hash_array.keys
end