Class: LigaMagicScraper::StoreScraper

Inherits:
BaseScraper show all
Defined in:
lib/ligamagic_scraper/scrapers/store_scraper.rb

Instance Attribute Summary collapse

Attributes inherited from BaseScraper

#alert_system, #browser_mode

Attributes included from Loggable

#logs

Instance Method Summary collapse

Methods inherited from BaseScraper

#close_browser, #configure_browser, #find_previous_scrape, #generate_slug, #parse_price, #save_to_json

Methods included from Loggable

#clear_logs, #formatted_logs, #initialize_logs, #log, #log_debug, #log_error, #log_info, #log_warning

Constructor Details

#initialize(store_domain:, search_term: nil, max_pages: nil, browser_mode: 'headed', alert_config: nil) ⇒ StoreScraper

Returns a new instance of StoreScraper.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 10

def initialize(store_domain:, search_term: nil, max_pages: nil, browser_mode: 'headed', alert_config: nil)
  @store_domain = store_domain
  @search_term = search_term
  @max_pages = max_pages
  @store_url = build_store_url(store_domain, search_term)
  @product_html_snapshots = []
  @timings = {}
  
  # Validate: max_pages required when no search term
  if search_term.nil? && max_pages.nil?
    raise ArgumentError, "max_pages is required when listing store products without a search term"
  end
  
  super(browser_mode:, alert_config:)
  
  log_info("🚀 Starting Liga Magic Store scraper...")
  log_info("🏪 Store domain: #{store_domain}")
  log_info("🔍 Search term: #{search_term}") if search_term
  log_info("📄 Max pages: #{max_pages}") if max_pages
  log_info("🔗 Store URL: #{@store_url}")
  log_info("🖥️  Browser mode: #{browser_mode}")
end

Instance Attribute Details

#max_pagesObject (readonly)

Returns the value of attribute max_pages.



8
9
10
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8

def max_pages
  @max_pages
end

#search_termObject (readonly)

Returns the value of attribute search_term.



8
9
10
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8

def search_term
  @search_term
end

#store_domainObject (readonly)

Returns the value of attribute store_domain.



8
9
10
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8

def store_domain
  @store_domain
end

#store_urlObject (readonly)

Returns the value of attribute store_url.



8
9
10
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8

def store_url
  @store_url
end

Instance Method Details

#build_json_data(products) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 95

def build_json_data(products)
  data = {
    store_domain:,
    store_url:,
    search_type: 'store',
    scraped_at: Time.now.iso8601,
    total_products: products.count,
    products:
  }
  
  data[:search_term] = search_term if search_term && !search_term.empty?
  data[:max_pages] = max_pages if max_pages
  data
end

#generate_filenameObject



83
84
85
86
87
88
89
90
91
92
93
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 83

def generate_filename
  datetime_str = Time.now.strftime('%Y%m%d_%H%M%S')
  store_slug = generate_slug(@store_domain)
  
  if @search_term && !@search_term.empty?
    search_slug = generate_slug(@search_term)
    "scrapped/stores/#{store_slug}/#{datetime_str}__#{search_slug}.json"
  else
    "scrapped/stores/#{store_slug}/#{datetime_str}.json"
  end
end

#scrapeObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 33

def scrape
  start_time = Time.now
  log_info("📄 Starting pagination...")
  
  # Phase 1: Load all pages and capture HTML (browser open)
  load_time = Benchmark.measure do
    load_all_pages
  end
  @timings[:loading_and_capture] = load_time.real
  log_info("⏱️  Phase 1 (Loading + Capture): #{format('%.2f', load_time.real)}s")
  
  # Close browser ASAP
  close_time = Benchmark.measure do
    close_browser
  end
  @timings[:browser_close] = close_time.real
  log_info("✅ Browser closed in #{format('%.2f', close_time.real)}s")
  
  # Phase 2: Extract products from memory (browser closed)
  log_info("🔍 Extracting products from memory...")
  all_products = nil
  extraction_time = Benchmark.measure do
    all_products = extract_products_from_html
  end
  @timings[:extraction] = extraction_time.real
  log_info("⏱️  Phase 2 (Extraction): #{format('%.2f', extraction_time.real)}s")
  
  total_time = Time.now - start_time
  @timings[:total] = total_time
  
  log_info("=" * 60)
  log_info("📊 PERFORMANCE SUMMARY")
  log_info("=" * 60)
  log_info("⏱️  Loading + Capture (browser open): #{format('%.2f', @timings[:loading_and_capture])}s")
  log_info("⏱️  Browser Close:                    #{format('%.2f', @timings[:browser_close])}s")
  log_info("⏱️  Extraction (browser closed):      #{format('%.2f', @timings[:extraction])}s")
  log_info("⏱️  Total Time:                       #{format('%.2f', @timings[:total])}s")
  log_info("📦 Products captured:                 #{@product_html_snapshots.count}")
  log_info("✅ Valid products extracted:          #{all_products.count}")
  log_info("=" * 60)
  
  all_products
rescue => e
  log_error("❌ Error during scraping: #{e.message}")
  log_debug(e.backtrace.first(5).join("\n"))
  []
ensure
  close_browser if @driver
end