Class: LigaMagicScraper::StoreScraper
- Inherits:
-
BaseScraper
- Object
- BaseScraper
- LigaMagicScraper::StoreScraper
- Defined in:
- lib/ligamagic_scraper/scrapers/store_scraper.rb
Instance Attribute Summary collapse
-
#max_pages ⇒ Object
readonly
Returns the value of attribute max_pages.
-
#search_term ⇒ Object
readonly
Returns the value of attribute search_term.
-
#store_domain ⇒ Object
readonly
Returns the value of attribute store_domain.
-
#store_url ⇒ Object
readonly
Returns the value of attribute store_url.
Attributes inherited from BaseScraper
Attributes included from Loggable
Instance Method Summary collapse
- #build_json_data(products) ⇒ Object
- #generate_filename ⇒ Object
-
#initialize(store_domain:, search_term: nil, max_pages: nil, browser_mode: 'headed', alert_config: nil) ⇒ StoreScraper
constructor
A new instance of StoreScraper.
- #scrape ⇒ Object
Methods inherited from BaseScraper
#close_browser, #configure_browser, #find_previous_scrape, #generate_slug, #parse_price, #save_to_json
Methods included from Loggable
#clear_logs, #formatted_logs, #initialize_logs, #log, #log_debug, #log_error, #log_info, #log_warning
Constructor Details
#initialize(store_domain:, search_term: nil, max_pages: nil, browser_mode: 'headed', alert_config: nil) ⇒ StoreScraper
Returns a new instance of StoreScraper.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 10 def initialize(store_domain:, search_term: nil, max_pages: nil, browser_mode: 'headed', alert_config: nil) @store_domain = store_domain @search_term = search_term @max_pages = max_pages @store_url = build_store_url(store_domain, search_term) @product_html_snapshots = [] @timings = {} # Validate: max_pages required when no search term if search_term.nil? && max_pages.nil? raise ArgumentError, "max_pages is required when listing store products without a search term" end super(browser_mode:, alert_config:) log_info("🚀 Starting Liga Magic Store scraper...") log_info("🏪 Store domain: #{store_domain}") log_info("🔍 Search term: #{search_term}") if search_term log_info("📄 Max pages: #{max_pages}") if max_pages log_info("🔗 Store URL: #{@store_url}") log_info("🖥️ Browser mode: #{browser_mode}") end |
Instance Attribute Details
#max_pages ⇒ Object (readonly)
Returns the value of attribute max_pages.
8 9 10 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8 def max_pages @max_pages end |
#search_term ⇒ Object (readonly)
Returns the value of attribute search_term.
8 9 10 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8 def search_term @search_term end |
#store_domain ⇒ Object (readonly)
Returns the value of attribute store_domain.
8 9 10 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8 def store_domain @store_domain end |
#store_url ⇒ Object (readonly)
Returns the value of attribute store_url.
8 9 10 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 8 def store_url @store_url end |
Instance Method Details
#build_json_data(products) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 95 def build_json_data(products) data = { store_domain:, store_url:, search_type: 'store', scraped_at: Time.now.iso8601, total_products: products.count, products: } data[:search_term] = search_term if search_term && !search_term.empty? data[:max_pages] = max_pages if max_pages data end |
#generate_filename ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 83 def generate_filename datetime_str = Time.now.strftime('%Y%m%d_%H%M%S') store_slug = generate_slug(@store_domain) if @search_term && !@search_term.empty? search_slug = generate_slug(@search_term) "scrapped/stores/#{store_slug}/#{datetime_str}__#{search_slug}.json" else "scrapped/stores/#{store_slug}/#{datetime_str}.json" end end |
#scrape ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/ligamagic_scraper/scrapers/store_scraper.rb', line 33 def scrape start_time = Time.now log_info("📄 Starting pagination...") # Phase 1: Load all pages and capture HTML (browser open) load_time = Benchmark.measure do load_all_pages end @timings[:loading_and_capture] = load_time.real log_info("⏱️ Phase 1 (Loading + Capture): #{format('%.2f', load_time.real)}s") # Close browser ASAP close_time = Benchmark.measure do close_browser end @timings[:browser_close] = close_time.real log_info("✅ Browser closed in #{format('%.2f', close_time.real)}s") # Phase 2: Extract products from memory (browser closed) log_info("🔍 Extracting products from memory...") all_products = nil extraction_time = Benchmark.measure do all_products = extract_products_from_html end @timings[:extraction] = extraction_time.real log_info("⏱️ Phase 2 (Extraction): #{format('%.2f', extraction_time.real)}s") total_time = Time.now - start_time @timings[:total] = total_time log_info("=" * 60) log_info("📊 PERFORMANCE SUMMARY") log_info("=" * 60) log_info("⏱️ Loading + Capture (browser open): #{format('%.2f', @timings[:loading_and_capture])}s") log_info("⏱️ Browser Close: #{format('%.2f', @timings[:browser_close])}s") log_info("⏱️ Extraction (browser closed): #{format('%.2f', @timings[:extraction])}s") log_info("⏱️ Total Time: #{format('%.2f', @timings[:total])}s") log_info("📦 Products captured: #{@product_html_snapshots.count}") log_info("✅ Valid products extracted: #{all_products.count}") log_info("=" * 60) all_products rescue => e log_error("❌ Error during scraping: #{e.message}") log_debug(e.backtrace.first(5).join("\n")) [] ensure close_browser if @driver end |