Class: SchemaSherlock::PerformanceOptimizer

Inherits:
Object
  • Object
show all
Defined in:
lib/schema_sherlock/performance_optimizer.rb

Overview

Centralized performance optimization for file and pattern processing

Constant Summary collapse

SMALL_FILE_THRESHOLD =

File size thresholds for processing strategies

64 * 1024
LARGE_FILE_THRESHOLD =

64KB

1024 * 1024

Class Method Summary collapse

Class Method Details

.count_patterns_optimized(content, table_name, column_name) ⇒ Object

Fast pattern matching with pre-filtering



28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/schema_sherlock/performance_optimizer.rb', line 28

def count_patterns_optimized(content, table_name, column_name)
  # Early exit if content is empty or too short
  return 0 if content.nil? || content.length < column_name.length
  
  # Quick pre-filter: check if column name exists at all
  content_lower = content.downcase
  column_lower = column_name.downcase
  
  # If column name doesn't appear anywhere, skip expensive matching
  return 0 unless content_lower.include?(column_lower)
  
  # Use optimized scanner
  OptimizedScanner.count_column_references_native(content, table_name, column_name)
end

.filter_relevant_files(file_paths, column_name) ⇒ Object

Smart file filtering to reduce I/O



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/schema_sherlock/performance_optimizer.rb', line 75

def filter_relevant_files(file_paths, column_name)
  # For very large sets, do a quick filename-based filter first
  if file_paths.size > 1000
    # Filter by filename patterns that are likely to contain the column
    association_name = column_name.gsub(/_id$/, '')
    relevant_patterns = [column_name, association_name, 'model', 'service', 'query']
    
    file_paths.select do |path|
      filename = File.basename(path, '.rb').downcase
      relevant_patterns.any? { |pattern| filename.include?(pattern) }
    end
  else
    file_paths
  end
end

.process_files_parallel(file_paths, table_name, column_name) ⇒ Object

Parallel file processing with optimal thread count



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/schema_sherlock/performance_optimizer.rb', line 44

def process_files_parallel(file_paths, table_name, column_name)
  return 0 if file_paths.empty?
  
  # Limit threads to avoid overwhelming the system
  max_threads = [Concurrent.processor_count, file_paths.size, 8].min
  
  futures = []
  thread_pool = Concurrent::FixedThreadPool.new(max_threads)
  
  file_paths.each do |file_path|
    future = Concurrent::Future.execute(executor: thread_pool) do
      content = read_file_optimized(file_path)
      count_patterns_optimized(content, table_name, column_name)
    end
    futures << future
  end
  
  # Collect results efficiently
  total_count = futures.sum do |future|
    future.value || 0
  rescue
    0
  end
  
  thread_pool.shutdown
  thread_pool.wait_for_termination(5)
  
  total_count
end

.read_file_optimized(file_path) ⇒ Object

High-performance file reading with size-based optimization



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/schema_sherlock/performance_optimizer.rb', line 10

def read_file_optimized(file_path)
  return "" unless File.exist?(file_path) && File.readable?(file_path)
  
  file_size = File.size(file_path)
  return "" if file_size == 0
  
  if file_size < LARGE_FILE_THRESHOLD
    # Small/medium files: direct read
    File.read(file_path, encoding: 'UTF-8', invalid: :replace, undef: :replace)
  else
    # Large files: chunked reading with buffer
    read_large_file_chunked(file_path)
  end
rescue
  ""
end