Class: Aircana::Contexts::Web

Inherits:
Object
  • Object
show all
Includes:
HTTParty
Defined in:
lib/aircana/contexts/web.rb

Overview

rubocop:disable Metrics/ClassLength

Instance Method Summary collapse

Constructor Details

#initializeWeb



21
22
23
# File 'lib/aircana/contexts/web.rb', line 21

def initialize
  @local_storage = Local.new
end

Instance Method Details

#fetch_url_for(kb_name:, url:, kb_type: "local") ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/aircana/contexts/web.rb', line 25

def fetch_url_for(kb_name:, url:, kb_type: "local")
  validate_url!(url)

  page_data = fetch_and_process_url(url)
  store_page_as_markdown(page_data, kb_name, kb_type)

  (page_data)
rescue StandardError => e
  handle_fetch_error(url, e)
  nil
end

#fetch_urls_for(kb_name:, urls:, kb_type: "local") ⇒ Object

rubocop:disable Metrics/MethodLength



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/aircana/contexts/web.rb', line 37

def fetch_urls_for(kb_name:, urls:, kb_type: "local") # rubocop:disable Metrics/MethodLength
  return { pages_count: 0, sources: [] } if urls.empty?

   = []
  successful_urls = []

  ProgressTracker.with_batch_progress(urls, "Fetching URLs") do |url, _index|
     = fetch_url_for(kb_name: kb_name, url: url, kb_type: kb_type)
    if 
       << 
      successful_urls << url
    end
  end

  if successful_urls.any?
    sources = (successful_urls, )
    update_or_create_manifest(kb_name, sources, kb_type)
    { pages_count: successful_urls.size, sources: sources }
  else
    { pages_count: 0, sources: [] }
  end
end

#refresh_web_sources(kb_name:) ⇒ Object

rubocop:disable Metrics/CyclomaticComplexity



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/aircana/contexts/web.rb', line 60

def refresh_web_sources(kb_name:) # rubocop:disable Metrics/CyclomaticComplexity
  sources = Manifest.sources_from_manifest(kb_name)
  kb_type = Manifest.kb_type_from_manifest(kb_name)
  web_sources = sources.select { |s| s["type"] == "web" }

  return { pages_count: 0, sources: [] } if web_sources.empty?

  all_urls = web_sources.flat_map { |source| source["urls"]&.map { |u| u["url"] } || [] }
  return { pages_count: 0, sources: [] } if all_urls.empty?

  fetch_urls_for(kb_name: kb_name, urls: all_urls, kb_type: kb_type)
end