Class: LigaMagicScraper::BaseScraper
- Inherits:
-
Object
- Object
- LigaMagicScraper::BaseScraper
- Includes:
- Capybara::DSL, Loggable
- Defined in:
- lib/ligamagic_scraper/scrapers/base_scraper.rb
Direct Known Subclasses
Instance Attribute Summary collapse
-
#alert_system ⇒ Object
readonly
Returns the value of attribute alert_system.
-
#browser_mode ⇒ Object
readonly
Returns the value of attribute browser_mode.
Attributes included from Loggable
Instance Method Summary collapse
-
#build_json_data(products) ⇒ Object
Override in subclasses.
- #close_browser ⇒ Object
- #configure_browser ⇒ Object
- #find_previous_scrape(current_filename) ⇒ Object
-
#generate_filename ⇒ Object
Override in subclasses.
- #generate_slug(name) ⇒ Object
-
#initialize(browser_mode: 'headed', alert_config: nil) ⇒ BaseScraper
constructor
A new instance of BaseScraper.
- #parse_price(price_text) ⇒ Object
- #save_to_json(products) ⇒ Object
-
#scrape ⇒ Object
Override in subclasses.
Methods included from Loggable
#clear_logs, #formatted_logs, #initialize_logs, #log, #log_debug, #log_error, #log_info, #log_warning
Constructor Details
#initialize(browser_mode: 'headed', alert_config: nil) ⇒ BaseScraper
Returns a new instance of BaseScraper.
8 9 10 11 12 13 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 8 def initialize(browser_mode: 'headed', alert_config: nil) @browser_mode = browser_mode @alert_system = AlertSystem.new(alert_config || {}) if alert_config initialize_logs configure_browser end |
Instance Attribute Details
#alert_system ⇒ Object (readonly)
Returns the value of attribute alert_system.
6 7 8 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 6 def alert_system @alert_system end |
#browser_mode ⇒ Object (readonly)
Returns the value of attribute browser_mode.
6 7 8 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 6 def browser_mode @browser_mode end |
Instance Method Details
#build_json_data(products) ⇒ Object
Override in subclasses
107 108 109 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 107 def build_json_data(products) raise NotImplementedError, "Subclasses must implement build_json_data" end |
#close_browser ⇒ Object
116 117 118 119 120 121 122 123 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 116 def close_browser log_info("🔒 Closing browser...") begin .current_session.driver.quit rescue => e log_error("⚠️ Error closing browser: #{e.message}") end end |
#configure_browser ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 15 def configure_browser case browser_mode.downcase when 'headless' .register_driver :selenium_chrome_headless do |app| = Selenium::WebDriver::Chrome::Options.new .add_argument('--headless') .add_argument('--disable-gpu') .add_argument('--no-sandbox') .add_argument('--disable-dev-shm-usage') ::Selenium::Driver.new(app, browser: :chrome, options: ) end .default_driver = :selenium_chrome_headless when 'headed' .default_driver = :selenium_chrome else log_warning("⚠️ Unknown browser mode '#{browser_mode}', using default (headed)") .default_driver = :selenium_chrome end end |
#find_previous_scrape(current_filename) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 83 def find_previous_scrape(current_filename) dir = File.dirname(current_filename) # Extract the slug pattern from current filename (e.g., "__booster_box.json") basename = File.basename(current_filename) slug_pattern = basename.match(/__(.+)\.json$/) return nil unless slug_pattern slug = slug_pattern[1] # Get all JSON files with the same slug pattern, sorted by name (chronological) matching_files = Dir.glob(File.join(dir, "*__#{slug}.json")).sort.reverse # Return the first file that's not the current one (most recent previous) matching_files.find { |f| f != current_filename } end |
#generate_filename ⇒ Object
Override in subclasses
102 103 104 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 102 def generate_filename raise NotImplementedError, "Subclasses must implement generate_filename" end |
#generate_slug(name) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 36 def generate_slug(name) return nil if name.nil? || name.empty? # Transliterate accented characters to ASCII slug = name.downcase .tr('áàãâäåāăąǎǟǡǻȁȃȧ', 'a') .tr('éèêëēĕėęěȅȇȩ', 'e') .tr('íìîïĩīĭįıȉȋ', 'i') .tr('óòôõöōŏőơǒǿȍȏȫȭȯȱ', 'o') .tr('úùûüũūŭůűųưȕȗ', 'u') .tr('çćĉċč', 'c') .tr('ñńņňʼn', 'n') .gsub(/[^a-z0-9]+/, '_') .gsub(/^_+|_+$/, '') slug end |
#parse_price(price_text) ⇒ Object
54 55 56 57 58 59 60 61 62 63 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 54 def parse_price(price_text) return nil if price_text.nil? || price_text.empty? price_text.gsub(/R\$\s*/, '') .gsub(/\./, '') .gsub(/,/, '.') .to_f rescue nil end |
#save_to_json(products) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 65 def save_to_json(products) filename = generate_filename dir = File.dirname(filename) FileUtils.mkdir_p(dir) unless Dir.exist?(dir) data = build_json_data(products) if @alert_system previous_file = find_previous_scrape(filename) @alert_system.process(current_data: data, previous_file:) end File.write(filename, JSON.pretty_generate(data)) log_info("💾 Results saved to: #{filename}") filename end |
#scrape ⇒ Object
Override in subclasses
112 113 114 |
# File 'lib/ligamagic_scraper/scrapers/base_scraper.rb', line 112 def scrape raise NotImplementedError, "Subclasses must implement scrape" end |