Class: LigaMagicScraper::GlobalScraper
- Inherits:
-
BaseScraper
- Object
- BaseScraper
- LigaMagicScraper::GlobalScraper
- Defined in:
- lib/ligamagic_scraper/scrapers/global_scraper.rb
Constant Summary collapse
- BASE_URL =
'https://www.ligamagic.com.br/?view=cards%2Fsearch&tipo=1'- MAX_CLICKS =
50
Instance Attribute Summary collapse
-
#search_term ⇒ Object
readonly
Returns the value of attribute search_term.
Attributes inherited from BaseScraper
Attributes included from Loggable
Instance Method Summary collapse
- #build_json_data(products) ⇒ Object
- #generate_filename ⇒ Object
-
#initialize(search_term:, browser_mode: 'headed', alert_config: nil) ⇒ GlobalScraper
constructor
A new instance of GlobalScraper.
- #scrape ⇒ Object
- #url ⇒ Object
Methods inherited from BaseScraper
#close_browser, #configure_browser, #find_previous_scrape, #generate_slug, #parse_price, #save_to_json
Methods included from Loggable
#clear_logs, #formatted_logs, #initialize_logs, #log, #log_debug, #log_error, #log_info, #log_warning
Constructor Details
#initialize(search_term:, browser_mode: 'headed', alert_config: nil) ⇒ GlobalScraper
Returns a new instance of GlobalScraper.
13 14 15 16 17 18 19 20 21 22 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 13 def initialize(search_term:, browser_mode: 'headed', alert_config: nil) @search_term = search_term @product_html_snapshots = [] @timings = {} super(browser_mode:, alert_config:) log_info("🚀 Starting Liga Magic global search scraper...") log_info("🔍 Search term: #{search_term}") log_info("🖥️ Browser mode: #{browser_mode}") end |
Instance Attribute Details
#search_term ⇒ Object (readonly)
Returns the value of attribute search_term.
11 12 13 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 11 def search_term @search_term end |
Instance Method Details
#build_json_data(products) ⇒ Object
94 95 96 97 98 99 100 101 102 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 94 def build_json_data(products) { search_term:, search_type: 'global', scraped_at: Time.now.iso8601, total_products: products.count, products: } end |
#generate_filename ⇒ Object
88 89 90 91 92 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 88 def generate_filename datetime_str = Time.now.strftime('%Y%m%d_%H%M%S') slug = search_term.downcase.gsub(/[^a-z0-9]+/, '_').gsub(/^_|_$/, '') "scrapped/global/#{datetime_str}__#{slug}.json" end |
#scrape ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 28 def scrape start_time = Time.now visit url log_info("📄 Loaded initial page") # Phase 1: Load all pages (browser open, no capturing) load_time = Benchmark.measure do load_all_products end @timings[:loading] = load_time.real log_info("⏱️ Phase 1 (Loading): #{format('%.2f', load_time.real)}s") # Phase 2: Capture all products at once (browser still open) capture_time = Benchmark.measure do capture_all_products end @timings[:capture] = capture_time.real log_info("⏱️ Phase 2 (Capture): #{format('%.2f', capture_time.real)}s") # Close browser ASAP close_time = Benchmark.measure do close_browser end @timings[:browser_close] = close_time.real log_info("✅ Browser closed in #{format('%.2f', close_time.real)}s") # Phase 3: Extract products from memory (browser closed) log_info("🔍 Extracting products from memory...") products = nil extraction_time = Benchmark.measure do products = extract_products end @timings[:extraction] = extraction_time.real log_info("⏱️ Phase 3 (Extraction): #{format('%.2f', extraction_time.real)}s") total_time = Time.now - start_time @timings[:total] = total_time log_info("=" * 60) log_info("📊 PERFORMANCE SUMMARY") log_info("=" * 60) log_info("⏱️ Loading (browser open): #{format('%.2f', @timings[:loading])}s") log_info("⏱️ Capture (browser open): #{format('%.2f', @timings[:capture])}s") log_info("⏱️ Browser Close: #{format('%.2f', @timings[:browser_close])}s") log_info("⏱️ Extraction (browser closed): #{format('%.2f', @timings[:extraction])}s") log_info("⏱️ Total Time: #{format('%.2f', @timings[:total])}s") log_info("📦 Products captured: #{@product_html_snapshots.count}") log_info("✅ Valid products extracted: #{products.count}") log_info("=" * 60) products rescue => e log_error("❌ Error during scraping: #{e.message}") log_debug(e.backtrace.first(5).join("\n")) [] ensure close_browser if @driver end |
#url ⇒ Object
24 25 26 |
# File 'lib/ligamagic_scraper/scrapers/global_scraper.rb', line 24 def url "#{BASE_URL}&card=#{CGI.escape(search_term)}" end |