Class: Maltese::Sitemap
- Inherits:
-
Object
- Object
- Maltese::Sitemap
- Defined in:
- lib/maltese/sitemap.rb
Instance Attribute Summary collapse
-
#sitemap_bucket ⇒ Object
readonly
Returns the value of attribute sitemap_bucket.
Instance Method Summary collapse
- #get_data(url) ⇒ Object
- #get_query_url(options = {}) ⇒ Object
- #get_total(options = {}) ⇒ Object
-
#initialize(attributes = {}) ⇒ Sitemap
constructor
A new instance of Sitemap.
- #job_batch_size ⇒ Object
- #parse_data(result) ⇒ Object
- #process_data(options = {}) ⇒ Object
- #push_data(options = {}) ⇒ Object
- #queue_jobs(options = {}) ⇒ Object
- #s3_adapter ⇒ Object
- #search_path ⇒ Object
- #sitemap ⇒ Object
- #sitemap_url ⇒ Object
- #sitemaps_path ⇒ Object
- #timeout ⇒ Object
Constructor Details
#initialize(attributes = {}) ⇒ Sitemap
Returns a new instance of Sitemap.
20 21 22 |
# File 'lib/maltese/sitemap.rb', line 20 def initialize(attributes={}) @sitemap_bucket = attributes[:sitemap_bucket].presence || "search.test.datacite.org" end |
Instance Attribute Details
#sitemap_bucket ⇒ Object (readonly)
Returns the value of attribute sitemap_bucket.
3 4 5 |
# File 'lib/maltese/sitemap.rb', line 3 def sitemap_bucket @sitemap_bucket end |
Instance Method Details
#get_data(url) ⇒ Object
108 109 110 |
# File 'lib/maltese/sitemap.rb', line 108 def get_data(url) Maremma.get(url, timeout: 300) end |
#get_query_url(options = {}) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/maltese/sitemap.rb', line 80 def get_query_url(={}) [:cursor] = [:cursor] || 1 [:size] = [:size] || job_batch_size params = { "fields[dois]" => "doi,updated", "page[cursor]" => [:cursor], "page[size]" => [:size] } search_path + URI.encode_www_form(params) end |
#get_total(options = {}) ⇒ Object
73 74 75 76 77 78 |
# File 'lib/maltese/sitemap.rb', line 73 def get_total(={}) query_url = get_query_url(.merge(size: 0)) result = Maremma.get(query_url, ) result.body.dig("meta", "total") end |
#job_batch_size ⇒ Object
40 41 42 |
# File 'lib/maltese/sitemap.rb', line 40 def job_batch_size 1000 end |
#parse_data(result) ⇒ Object
112 113 114 115 116 117 118 119 120 |
# File 'lib/maltese/sitemap.rb', line 112 def parse_data(result) return result.body.fetch("errors") if result.body.fetch("errors", nil).present? result.body.fetch("data", []).each do |item| loc = "/works/" + item.dig("attributes", "doi") sitemap.add loc, changefreq: "monthly", lastmod: item.dig("attrributes", "updated") end sitemap.sitemap.link_count end |
#process_data(options = {}) ⇒ Object
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/maltese/sitemap.rb', line 92 def process_data( = {}) [:start_time] = Time.now # walk through paginated results while [:url] do response = get_data([:url]) parse_data(response) [:url] = response.body.dig("links", "next") # don't loop when testing break if ENV['RACK'] == "test" end push_data() end |
#push_data(options = {}) ⇒ Object
122 123 124 125 126 127 |
# File 'lib/maltese/sitemap.rb', line 122 def push_data(={}) sitemap.finalize! [:start_time] ||= Time.now sitemap.sitemap_index.stats_summary(:time_taken => Time.now - [:start_time]) sitemap.sitemap.link_count end |
#queue_jobs(options = {}) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/maltese/sitemap.rb', line 60 def queue_jobs(={}) total = get_total() if total > 0 puts process_data(.merge(total: total, url: get_query_url)) else puts "No works found." end # return number of works queued total end |
#s3_adapter ⇒ Object
53 54 55 56 57 58 |
# File 'lib/maltese/sitemap.rb', line 53 def s3_adapter SitemapGenerator::AwsSdkAdapter.new(sitemap_bucket, aws_access_key_id: ENV['AWS_ACCESS_KEY_ID'], aws_secret_access_key: ENV['AWS_SECRET_ACCESS_KEY'], aws_region: ENV['AWS_REGION']) end |
#search_path ⇒ Object
32 33 34 |
# File 'lib/maltese/sitemap.rb', line 32 def search_path ENV['RACK_ENV'] == "production" ? "https://api.datacite.org/dois?" : "https://api.test.datacite.org/dois?" end |
#sitemap ⇒ Object
44 45 46 47 48 49 50 51 |
# File 'lib/maltese/sitemap.rb', line 44 def sitemap @sitemap ||= SitemapGenerator::LinkSet.new( default_host: sitemap_url, sitemaps_host: sitemap_url, sitemaps_path: sitemaps_path, adapter: s3_adapter, finalize: false) end |
#sitemap_url ⇒ Object
24 25 26 |
# File 'lib/maltese/sitemap.rb', line 24 def sitemap_url ENV['RACK_ENV'] == "production" ? "https://search.datacite.org/" : "https://search.test.datacite.org/" end |
#sitemaps_path ⇒ Object
28 29 30 |
# File 'lib/maltese/sitemap.rb', line 28 def sitemaps_path "sitemaps/" end |
#timeout ⇒ Object
36 37 38 |
# File 'lib/maltese/sitemap.rb', line 36 def timeout 60 end |