Class: Retriever::Fetch
- Inherits:
-
Object
- Object
- Retriever::Fetch
- Defined in:
- lib/retriever/fetch.rb
Direct Known Subclasses
Constant Summary collapse
- HR =
'###############################'
Instance Attribute Summary collapse
-
#max_pages ⇒ Object
readonly
Returns the value of attribute max_pages.
-
#result ⇒ Object
readonly
Returns the value of attribute result.
-
#t ⇒ Object
readonly
Returns the value of attribute t.
Instance Method Summary collapse
-
#dump ⇒ Object
prints current data collection to STDOUT.
- #errlog(msg) ⇒ Object
-
#initialize(url, options) ⇒ Fetch
constructor
given target URL and RR options, creates a fetch object.
- #lg(msg) ⇒ Object
-
#write ⇒ Object
writes current data collection out to CSV in current directory.
Constructor Details
#initialize(url, options) ⇒ Fetch
given target URL and RR options, creates a fetch object. There is no direct output this is a parent class that the other fetch classes build off of.
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/retriever/fetch.rb', line 17 def initialize(url, ) @result = [] @connection_tally = { success: 0, error: 0, error_client: 0, error_server: 0 } () if @progress @t = Retriever::Target.new(url, @file_re) @output = "rr-#{@t.host.split('.')[1]}" if @fileharvest && !@output @already_crawled = setup_bloom_filter @page_one = crawl_page_one @link_stack = create_link_stack @temp_link_stack = [] end |
Instance Attribute Details
#max_pages ⇒ Object (readonly)
Returns the value of attribute max_pages.
13 14 15 |
# File 'lib/retriever/fetch.rb', line 13 def max_pages @max_pages end |
#result ⇒ Object (readonly)
Returns the value of attribute result.
13 14 15 |
# File 'lib/retriever/fetch.rb', line 13 def result @result end |
#t ⇒ Object (readonly)
Returns the value of attribute t.
13 14 15 |
# File 'lib/retriever/fetch.rb', line 13 def t @t end |
Instance Method Details
#dump ⇒ Object
prints current data collection to STDOUT
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/retriever/fetch.rb', line 44 def dump puts HR puts "Connection Tally:\n#{@connection_tally}\n#{HR}" if @verbose puts "Target URL: #{@t.target}" if @sitemap puts 'Sitemap' elsif @fileharvest puts "File harvest by type: #{@fileharvest}" elsif @seo puts 'SEO Metrics' end puts "Data Dump -- Object Count: #{@result.size}" puts HR @result.each do |line| puts line end puts end |
#errlog(msg) ⇒ Object
35 36 37 |
# File 'lib/retriever/fetch.rb', line 35 def errlog(msg) fail "ERROR: #{msg}" end |
#lg(msg) ⇒ Object
39 40 41 |
# File 'lib/retriever/fetch.rb', line 39 def lg(msg) puts "### #{msg}" if @verbose end |
#write ⇒ Object
writes current data collection out to CSV in current directory
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/retriever/fetch.rb', line 64 def write return false unless @output i = 0 CSV.open("#{@output}.csv", 'w') do |csv| if (i == 0) && @seo csv << ['URL', 'Page Title', 'Meta Description', 'H1', 'H2'] i += 1 end @result.each do |entry| csv << entry end end puts HR puts "File Created: #{@output}.csv" puts "Object Count: #{@result.size}" puts HR puts end |