Class: HashSpidey::HashUrlRecord

Inherits:
Object
  • Object
show all
Defined in:
lib/hash_spidey/hash_url_record.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, opts = {}) ⇒ HashUrlRecord

Returns a new instance of HashUrlRecord.



22
23
24
25
26
27
28
29
30
31
# File 'lib/hash_spidey/hash_url_record.rb', line 22

def initialize(url, opts={})
	@url = url
	@addressable_uri = Addressable::URI.parse(@url)
	@initialized_timestamp = Time.now 

	mash_opts = Hashie::Mash.new(opts) 
	@spider = mash_opts.delete :spider 
	@handler = mash_opts.delete :handler 
	@handle_data = mash_opts.delete :handle_data # not sure if needed?...
end

Instance Attribute Details

#codeObject (readonly)

Returns the value of attribute code.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def code
  @code
end

#contentObject (readonly)

Returns the value of attribute content.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def content
  @content
end

#crawl_metadataObject (readonly)

Returns the value of attribute crawl_metadata.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def 
  @crawl_metadata
end

#crawled_timestampObject (readonly)

Returns the value of attribute crawled_timestamp.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def crawled_timestamp
  @crawled_timestamp
end

#handle_dataObject (readonly)

Returns the value of attribute handle_data.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def handle_data
  @handle_data
end

#handlerObject (readonly)

Returns the value of attribute handler.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def handler
  @handler
end

#initialized_timestampObject (readonly)

Returns the value of attribute initialized_timestamp.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def initialized_timestamp
  @initialized_timestamp
end

#recorded_timestampObject (readonly)

Returns the value of attribute recorded_timestamp.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def recorded_timestamp
  @recorded_timestamp
end

#spiderObject (readonly)

Returns the value of attribute spider.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def spider
  @spider
end

#urlObject (readonly)

Returns the value of attribute url.



7
8
9
# File 'lib/hash_spidey/hash_url_record.rb', line 7

def url
  @url
end

Class Method Details

.spidey_handle(url, handler, spider, opts) ⇒ Object

convenience name for spidey



14
15
16
17
18
19
20
# File 'lib/hash_spidey/hash_url_record.rb', line 14

def self.spidey_handle(url, handler, spider, opts)
	mash_opts = Hashie::Mash.new opts 
	mash_opts.spider = spider
	mash_opts.handler = handler 

	return HashUrlRecord.new url, mash_opts
end

Instance Method Details

#collected_timestampObject

obvious smells



58
# File 'lib/hash_spidey/hash_url_record.rb', line 58

def collected_timestamp; @recorded_timestamp; end

#crawled?Boolean

Returns:

  • (Boolean)


50
51
52
# File 'lib/hash_spidey/hash_url_record.rb', line 50

def crawled?
	!(crawled_timestamp.nil?)
end

#headerObject



59
# File 'lib/hash_spidey/hash_url_record.rb', line 59

def header; @crawl_metadata.header unless @crawl_metadata.nil? ; end

#mark_as_crawled(page_obj = {}) ⇒ Object

saves data related



40
41
42
43
44
# File 'lib/hash_spidey/hash_url_record.rb', line 40

def mark_as_crawled(page_obj={})
	@crawled_timestamp = Time.now
	# do something with mechanized page object
	@crawl_metadata = HashSpidey::CrawlRecord.new(page_obj, @crawled_timestamp)
end

#query_valuesObject



69
70
71
# File 'lib/hash_spidey/hash_url_record.rb', line 69

def query_values
	@addressable_uri.query_values
end

#record_content(ct) ⇒ Object



34
35
36
37
# File 'lib/hash_spidey/hash_url_record.rb', line 34

def record_content(ct)
	@content = ct 
	@recorded_timestamp = Time.now
end

#recorded?Boolean

Returns:

  • (Boolean)


46
47
48
# File 'lib/hash_spidey/hash_url_record.rb', line 46

def recorded?
	!(@recorded_timestamp.nil?)
end