Class: LigaMagicScraper::BaseScraper

Inherits:
Object
  • Object
show all
Includes:
Capybara::DSL, Loggable
Defined in:
lib/ligamagic_scraper/scrapers/base_scraper.rb

Direct Known Subclasses

GlobalScraper, StoreScraper

Instance Attribute Summary collapse

Attributes included from Loggable

#logs

Instance Method Summary collapse

Methods included from Loggable

#clear_logs, #formatted_logs, #initialize_logs, #log, #log_debug, #log_error, #log_info, #log_warning

Constructor Details

#initialize(browser_mode: 'headed', alert_config: nil) ⇒ BaseScraper

Returns a new instance of BaseScraper.



8
9
10
11
12
13
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 8

def initialize(browser_mode: 'headed', alert_config: nil)
  @browser_mode = browser_mode
  @alert_system = AlertSystem.new(alert_config || {}) if alert_config
  initialize_logs
  configure_browser
end

Instance Attribute Details

#alert_systemObject (readonly)

Returns the value of attribute alert_system.



6
7
8
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 6

def alert_system
  @alert_system
end

#browser_modeObject (readonly)

Returns the value of attribute browser_mode.



6
7
8
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 6

def browser_mode
  @browser_mode
end

Instance Method Details

#build_json_data(products) ⇒ Object

Override in subclasses

Raises:

  • (NotImplementedError)


107
108
109
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 107

def build_json_data(products)
  raise NotImplementedError, "Subclasses must implement build_json_data"
end

#close_browserObject



116
117
118
119
120
121
122
123
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 116

def close_browser
  log_info("🔒 Closing browser...")
  begin
    Capybara.current_session.driver.quit
  rescue => e
    log_error("⚠️  Error closing browser: #{e.message}")
  end
end

#configure_browserObject



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 15

def configure_browser
  case browser_mode.downcase
  when 'headless'
    Capybara.register_driver :selenium_chrome_headless do |app|
      options = Selenium::WebDriver::Chrome::Options.new
      options.add_argument('--headless')
      options.add_argument('--disable-gpu')
      options.add_argument('--no-sandbox')
      options.add_argument('--disable-dev-shm-usage')
      
      Capybara::Selenium::Driver.new(app, browser: :chrome, options: options)
    end
    Capybara.default_driver = :selenium_chrome_headless
  when 'headed'
    Capybara.default_driver = :selenium_chrome
  else
    log_warning("⚠️  Unknown browser mode '#{browser_mode}', using default (headed)")
    Capybara.default_driver = :selenium_chrome
  end
end

#find_previous_scrape(current_filename) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 83

def find_previous_scrape(current_filename)
  dir = File.dirname(current_filename)
  
  # Extract the slug pattern from current filename (e.g., "__booster_box.json")
  basename = File.basename(current_filename)
  slug_pattern = basename.match(/__(.+)\.json$/)
  
  return nil unless slug_pattern
  
  slug = slug_pattern[1]
  
  # Get all JSON files with the same slug pattern, sorted by name (chronological)
  matching_files = Dir.glob(File.join(dir, "*__#{slug}.json")).sort.reverse
  
  # Return the first file that's not the current one (most recent previous)
  matching_files.find { |f| f != current_filename }
end

#generate_filenameObject

Override in subclasses

Raises:

  • (NotImplementedError)


102
103
104
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 102

def generate_filename
  raise NotImplementedError, "Subclasses must implement generate_filename"
end

#generate_slug(name) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 36

def generate_slug(name)
  return nil if name.nil? || name.empty?
  
  # Transliterate accented characters to ASCII
  slug = name.downcase
             .tr('áàãâäåāăąǎǟǡǻȁȃȧ', 'a')
             .tr('éèêëēĕėęěȅȇȩ', 'e')
             .tr('íìîïĩīĭįıȉȋ', 'i')
             .tr('óòôõöōŏőơǒǿȍȏȫȭȯȱ', 'o')
             .tr('úùûüũūŭůűųưȕȗ', 'u')
             .tr('çćĉċč', 'c')
             .tr('ñńņňʼn', 'n')
             .gsub(/[^a-z0-9]+/, '_')
             .gsub(/^_+|_+$/, '')
  
  slug
end

#parse_price(price_text) ⇒ Object



54
55
56
57
58
59
60
61
62
63
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 54

def parse_price(price_text)
  return nil if price_text.nil? || price_text.empty?
  
  price_text.gsub(/R\$\s*/, '')
            .gsub(/\./, '')
            .gsub(/,/, '.')
            .to_f
rescue
  nil
end

#save_to_json(products) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 65

def save_to_json(products)
  filename = generate_filename
  
  dir = File.dirname(filename)
  FileUtils.mkdir_p(dir) unless Dir.exist?(dir)
  
  data = build_json_data(products)
  
  if @alert_system
    previous_file = find_previous_scrape(filename)
    @alert_system.process(current_data: data, previous_file:)
  end
  
  File.write(filename, JSON.pretty_generate(data))
  log_info("💾 Results saved to: #{filename}")
  filename
end

#scrapeObject

Override in subclasses

Raises:

  • (NotImplementedError)


112
113
114
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 112

def scrape
  raise NotImplementedError, "Subclasses must implement scrape"
end