Class: Sqlreport::BatchManager

Inherits:
Object
  • Object
show all
Defined in:
lib/sqlreport/batch_manager.rb

Overview

BatchManager Handles batch processing of SQL queries for large datasets

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query, batch_size: 1000, db_config: false) ⇒ BatchManager

Returns a new instance of BatchManager.



9
10
11
12
13
14
15
16
17
# File 'lib/sqlreport/batch_manager.rb', line 9

def initialize(query, batch_size: 1000, db_config: false)
  @query = query
  @batch_size = batch_size
  @db_config = db_config
  @total_rows = nil
  @processed_rows = 0
  @current_offset = 0
  @complete = false
end

Instance Attribute Details

#batch_sizeObject (readonly)

Returns the value of attribute batch_size.



7
8
9
# File 'lib/sqlreport/batch_manager.rb', line 7

def batch_size
  @batch_size
end

#current_offsetObject (readonly)

Returns the value of attribute current_offset.



7
8
9
# File 'lib/sqlreport/batch_manager.rb', line 7

def current_offset
  @current_offset
end

#processed_rowsObject (readonly)

Returns the value of attribute processed_rows.



7
8
9
# File 'lib/sqlreport/batch_manager.rb', line 7

def processed_rows
  @processed_rows
end

#total_rowsObject (readonly)

Returns the value of attribute total_rows.



7
8
9
# File 'lib/sqlreport/batch_manager.rb', line 7

def total_rows
  @total_rows
end

Instance Method Details

#connectionObject



19
20
21
22
23
24
25
# File 'lib/sqlreport/batch_manager.rb', line 19

def connection
  @connection ||= if @db_config
                    ActiveRecord::Base.establish_connection(@db_config)
                  else
                    ActiveRecord::Base.connection
                  end
end

#count_total_rowsObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/sqlreport/batch_manager.rb', line 27

def count_total_rows
  return @total_rows if @total_rows

  # Extract the FROM clause and any WHERE conditions to count total rows
  from_clause = @query.match(/FROM\s+([^\s;]+)(\s+WHERE\s+(.+?))?(\s+ORDER BY|\s+GROUP BY|\s+LIMIT|\s*;|\s*$)/i)
  return nil unless from_clause

  table = from_clause[1]
  where_clause = from_clause[3]

  count_query = "SELECT COUNT(*) FROM #{table}"
  count_query += " WHERE #{where_clause}" if where_clause

  result = connection.exec_query(count_query)
  @total_rows = result.rows.first.first.to_i
end

#next_batchObject



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/sqlreport/batch_manager.rb', line 44

def next_batch
  return nil if @complete

  # Add LIMIT and OFFSET to the query
  paginated_query = if @query.include?("LIMIT")
                      # If query already has LIMIT, we need to handle differently
                      raise "Batch processing not supported for queries with LIMIT clause"
                    else
                      "#{@query.chomp(";")} LIMIT #{@batch_size} OFFSET #{@current_offset}"
                    end

  result = Sqlreport::Result.new(paginated_query, db_config: @db_config).result

  # Update state
  rows_in_batch = result.rows.count
  @processed_rows += rows_in_batch
  @current_offset += @batch_size
  @complete = rows_in_batch < @batch_size

  result
end

#process_allObject



66
67
68
69
70
71
72
73
# File 'lib/sqlreport/batch_manager.rb', line 66

def process_all
  results = []
  while (batch = next_batch)
    results << batch
    yield batch if block_given?
  end
  results
end

#progress_percentageObject



99
100
101
102
103
# File 'lib/sqlreport/batch_manager.rb', line 99

def progress_percentage
  return 0 unless @total_rows && @total_rows.positive?

  (@processed_rows.to_f / @total_rows * 100).round(2)
end

#stream_to_csv(path, include_headers: true, separator: ",", quote_char: '"') ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/sqlreport/batch_manager.rb', line 75

def stream_to_csv(path, include_headers: true, separator: ",", quote_char: '"')
  first_batch = true

  File.open(path, "w") do |file|
    process_all do |batch|
      if first_batch && include_headers
        headers = batch.columns.join(separator)
        file.puts headers
        first_batch = false
      elsif first_batch
        first_batch = false
      end

      # Write rows without loading all into memory
      batch.rows.each do |row|
        csv_row = CSV.generate_line(row, col_sep: separator, quote_char: quote_char)
        file.write(csv_row)
      end
    end
  end

  true
end