Class: RelatonAsme::Fetcher::Runner

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_asme/fetcher/runner.rb

Overview

Orchestrates the fetching of ASME publications

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(agent: nil) ⇒ Runner

Initialize the runner



15
16
17
# File 'lib/relaton_asme/fetcher/runner.rb', line 15

def initialize(agent: nil)
  @agent = agent || create_agent
end

Instance Attribute Details

#agentObject (readonly)

Returns the value of attribute agent.



10
11
12
# File 'lib/relaton_asme/fetcher/runner.rb', line 10

def agent
  @agent
end

Instance Method Details

#fetch_all(max_pages: nil, per_page: 100) ⇒ Array<AsmePublicationEdition>

Fetch all publications from all pages



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/relaton_asme/fetcher/runner.rb', line 24

def fetch_all(max_pages: nil, per_page: 100)
  editions = []
  page_number = 1

  loop do
    puts "Fetching page #{page_number}..."
    page_editions = fetch_page(page_number, per_page: per_page)
    editions.concat(page_editions)

    break if max_pages && page_number >= max_pages
    break unless more_pages?(page_number, per_page)

    page_number += 1
  end

  editions
end

#fetch_all_editions(output_file, format: :json, max_pages: nil) ⇒ Array<AsmePublicationEdition>

Fetch all editions for publications and save to file



110
111
112
113
114
115
116
# File 'lib/relaton_asme/fetcher/runner.rb', line 110

def fetch_all_editions(output_file, format: :json, max_pages: nil)
  editions = fetch_all(max_pages: max_pages)

  save_editions(editions, output_file, format)

  editions
end

#fetch_page(page_number, per_page: 100) ⇒ Array<AsmePublicationEdition>

Fetch publications from a specific page



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/relaton_asme/fetcher/runner.rb', line 47

def fetch_page(page_number, per_page: 100)
  index = IndexPage.new(
    page_number: page_number,
    per_page: per_page,
    agent: @agent
  )

  urls = index.publication_urls
  puts "Found #{urls.size} publications on page #{page_number}"

  fetch_publications(urls)
end

#fetch_publication(url) ⇒ AsmePublication?

Fetch a single publication by URL



85
86
87
88
# File 'lib/relaton_asme/fetcher/runner.rb', line 85

def fetch_publication(url)
  page = PublicationPage.new(url, agent: @agent)
  page.to_data
end

#fetch_publications(urls) ⇒ Array<AsmePublicationEdition>

Fetch multiple publications by URLs



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/relaton_asme/fetcher/runner.rb', line 64

def fetch_publications(urls)
  editions = []

  urls.each_with_index do |url, index|
    puts "Fetching publication #{index + 1}/#{urls.size}: #{url}"

    begin
      publication = fetch_publication(url)
      editions.concat(publication.editions) if publication&.editions
    rescue StandardError => e
      warn "Failed to fetch #{url}: #{e.message}"
    end
  end

  editions
end

#more_pages?(page_number, per_page) ⇒ Boolean

Check if more pages exist



95
96
97
98
99
100
101
102
# File 'lib/relaton_asme/fetcher/runner.rb', line 95

def more_pages?(page_number, per_page)
  index = IndexPage.new(
    page_number: page_number,
    per_page: per_page,
    agent: @agent
  )
  index.more_pages?
end

#save_editions(editions, output_file, format) ⇒ Object

Save editions to file



123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/relaton_asme/fetcher/runner.rb', line 123

def save_editions(editions, output_file, format)
  content = case format
            when :json
              serialize_to_json(editions)
            when :yaml
              serialize_to_yaml(editions)
            else
              raise ArgumentError, "Unsupported format: #{format}"
            end

  File.write(output_file, content)
  puts "Saved #{editions.size} editions to #{output_file}"
end