Class: Retriever::Fetch

Inherits:
Object
  • Object
show all
Defined in:
lib/retriever/fetch.rb

Direct Known Subclasses

FetchFiles, FetchSEO, FetchSitemap

Constant Summary collapse

HR =
'###############################'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options) ⇒ Fetch

given target URL and RR options, creates a fetch object. There is no direct output this is a parent class that the other fetch classes build off of.



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/retriever/fetch.rb', line 17

def initialize(url, options)
  @result = []
  @connection_tally = {
    success: 0,
    error: 0,
    error_client: 0,
    error_server: 0
  }
  setup_options(options)
  setup_progress_bar if @progress
  @t = Retriever::Target.new(url, @file_re)
  @output = "rr-#{@t.host.split('.')[1]}" if @fileharvest && !@output
  @already_crawled = setup_bloom_filter
  @page_one = crawl_page_one
  @link_stack = create_link_stack
  @temp_link_stack = []
end

Instance Attribute Details

#max_pagesObject (readonly)

Returns the value of attribute max_pages.



13
14
15
# File 'lib/retriever/fetch.rb', line 13

def max_pages
  @max_pages
end

#resultObject (readonly)

Returns the value of attribute result.



13
14
15
# File 'lib/retriever/fetch.rb', line 13

def result
  @result
end

#tObject (readonly)

Returns the value of attribute t.



13
14
15
# File 'lib/retriever/fetch.rb', line 13

def t
  @t
end

Instance Method Details

#dumpObject

prints current data collection to STDOUT



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/retriever/fetch.rb', line 44

def dump
  puts HR
  puts "Connection Tally:\n#{@connection_tally}\n#{HR}" if @verbose
  puts "Target URL: #{@t.target}"
  if @sitemap
    puts 'Sitemap'
  elsif @fileharvest
    puts "File harvest by type: #{@fileharvest}"
  elsif @seo
    puts 'SEO Metrics'
  end
  puts "Data Dump -- Object Count: #{@result.size}"
  puts HR
  @result.each do |line|
    puts line
  end
  puts
end

#errlog(msg) ⇒ Object



35
36
37
# File 'lib/retriever/fetch.rb', line 35

def errlog(msg)
  fail "ERROR: #{msg}"
end

#lg(msg) ⇒ Object



39
40
41
# File 'lib/retriever/fetch.rb', line 39

def lg(msg)
  puts "### #{msg}" if @verbose
end

#writeObject

writes current data collection out to CSV in current directory



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/retriever/fetch.rb', line 64

def write
  return false unless @output
  i = 0
  CSV.open("#{@output}.csv", 'w') do |csv|
    if (i == 0) && @seo
      csv << ['URL', 'Page Title', 'Meta Description', 'H1', 'H2']
      i += 1
    end
    @result.each do |entry|
      csv << entry
    end
  end
  puts HR
  puts "File Created: #{@output}.csv"
  puts "Object Count: #{@result.size}"
  puts HR
  puts
end