Class: RelatonAsme::Fetcher::Runner
- Inherits:
-
Object
- Object
- RelatonAsme::Fetcher::Runner
- Defined in:
- lib/relaton_asme/fetcher/runner.rb
Overview
Orchestrates the fetching of ASME publications
Instance Attribute Summary collapse
-
#agent ⇒ Object
readonly
Returns the value of attribute agent.
Instance Method Summary collapse
-
#fetch_all(max_pages: nil, per_page: 100) ⇒ Array<AsmePublicationEdition>
Fetch all publications from all pages.
-
#fetch_all_editions(output_file, format: :json, max_pages: nil) ⇒ Array<AsmePublicationEdition>
Fetch all editions for publications and save to file.
-
#fetch_page(page_number, per_page: 100) ⇒ Array<AsmePublicationEdition>
Fetch publications from a specific page.
-
#fetch_publication(url) ⇒ AsmePublication?
Fetch a single publication by URL.
-
#fetch_publications(urls) ⇒ Array<AsmePublicationEdition>
Fetch multiple publications by URLs.
-
#initialize(agent: nil) ⇒ Runner
constructor
Initialize the runner.
-
#more_pages?(page_number, per_page) ⇒ Boolean
Check if more pages exist.
-
#save_editions(editions, output_file, format) ⇒ Object
Save editions to file.
Constructor Details
#initialize(agent: nil) ⇒ Runner
Initialize the runner
15 16 17 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 15 def initialize(agent: nil) @agent = agent || create_agent end |
Instance Attribute Details
#agent ⇒ Object (readonly)
Returns the value of attribute agent.
10 11 12 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 10 def agent @agent end |
Instance Method Details
#fetch_all(max_pages: nil, per_page: 100) ⇒ Array<AsmePublicationEdition>
Fetch all publications from all pages
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 24 def fetch_all(max_pages: nil, per_page: 100) editions = [] page_number = 1 loop do puts "Fetching page #{page_number}..." page_editions = fetch_page(page_number, per_page: per_page) editions.concat(page_editions) break if max_pages && page_number >= max_pages break unless more_pages?(page_number, per_page) page_number += 1 end editions end |
#fetch_all_editions(output_file, format: :json, max_pages: nil) ⇒ Array<AsmePublicationEdition>
Fetch all editions for publications and save to file
110 111 112 113 114 115 116 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 110 def fetch_all_editions(output_file, format: :json, max_pages: nil) editions = fetch_all(max_pages: max_pages) save_editions(editions, output_file, format) editions end |
#fetch_page(page_number, per_page: 100) ⇒ Array<AsmePublicationEdition>
Fetch publications from a specific page
47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 47 def fetch_page(page_number, per_page: 100) index = IndexPage.new( page_number: page_number, per_page: per_page, agent: @agent ) urls = index.publication_urls puts "Found #{urls.size} publications on page #{page_number}" fetch_publications(urls) end |
#fetch_publication(url) ⇒ AsmePublication?
Fetch a single publication by URL
85 86 87 88 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 85 def fetch_publication(url) page = PublicationPage.new(url, agent: @agent) page.to_data end |
#fetch_publications(urls) ⇒ Array<AsmePublicationEdition>
Fetch multiple publications by URLs
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 64 def fetch_publications(urls) editions = [] urls.each_with_index do |url, index| puts "Fetching publication #{index + 1}/#{urls.size}: #{url}" begin publication = fetch_publication(url) editions.concat(publication.editions) if publication&.editions rescue StandardError => e warn "Failed to fetch #{url}: #{e.}" end end editions end |
#more_pages?(page_number, per_page) ⇒ Boolean
Check if more pages exist
95 96 97 98 99 100 101 102 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 95 def more_pages?(page_number, per_page) index = IndexPage.new( page_number: page_number, per_page: per_page, agent: @agent ) index.more_pages? end |
#save_editions(editions, output_file, format) ⇒ Object
Save editions to file
123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/relaton_asme/fetcher/runner.rb', line 123 def save_editions(editions, output_file, format) content = case format when :json serialize_to_json(editions) when :yaml serialize_to_yaml(editions) else raise ArgumentError, "Unsupported format: #{format}" end File.write(output_file, content) puts "Saved #{editions.size} editions to #{output_file}" end |