Class: HashSpidey::HashUrlRecord
- Inherits:
-
Object
- Object
- HashSpidey::HashUrlRecord
- Defined in:
- lib/hash_spidey/hash_url_record.rb
Instance Attribute Summary collapse
-
#code ⇒ Object
readonly
Returns the value of attribute code.
-
#content ⇒ Object
readonly
Returns the value of attribute content.
-
#crawl_metadata ⇒ Object
readonly
Returns the value of attribute crawl_metadata.
-
#crawled_timestamp ⇒ Object
readonly
Returns the value of attribute crawled_timestamp.
-
#handle_data ⇒ Object
readonly
Returns the value of attribute handle_data.
-
#handler ⇒ Object
readonly
Returns the value of attribute handler.
-
#initialized_timestamp ⇒ Object
readonly
Returns the value of attribute initialized_timestamp.
-
#recorded_timestamp ⇒ Object
readonly
Returns the value of attribute recorded_timestamp.
-
#spider ⇒ Object
readonly
Returns the value of attribute spider.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Class Method Summary collapse
-
.spidey_handle(url, handler, spider, opts) ⇒ Object
convenience name for spidey.
Instance Method Summary collapse
-
#collected_timestamp ⇒ Object
obvious smells.
- #crawled? ⇒ Boolean
- #header ⇒ Object
-
#initialize(url, opts = {}) ⇒ HashUrlRecord
constructor
A new instance of HashUrlRecord.
-
#mark_as_crawled(page_obj = {}) ⇒ Object
saves data related.
- #query_values ⇒ Object
- #record_content(ct) ⇒ Object
- #recorded? ⇒ Boolean
Constructor Details
#initialize(url, opts = {}) ⇒ HashUrlRecord
Returns a new instance of HashUrlRecord.
22 23 24 25 26 27 28 29 30 31 |
# File 'lib/hash_spidey/hash_url_record.rb', line 22 def initialize(url, opts={}) @url = url @addressable_uri = Addressable::URI.parse(@url) @initialized_timestamp = Time.now mash_opts = Hashie::Mash.new(opts) @spider = mash_opts.delete :spider @handler = mash_opts.delete :handler @handle_data = mash_opts.delete :handle_data # not sure if needed?... end |
Instance Attribute Details
#code ⇒ Object (readonly)
Returns the value of attribute code.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def code @code end |
#content ⇒ Object (readonly)
Returns the value of attribute content.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def content @content end |
#crawl_metadata ⇒ Object (readonly)
Returns the value of attribute crawl_metadata.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def @crawl_metadata end |
#crawled_timestamp ⇒ Object (readonly)
Returns the value of attribute crawled_timestamp.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def @crawled_timestamp end |
#handle_data ⇒ Object (readonly)
Returns the value of attribute handle_data.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def handle_data @handle_data end |
#handler ⇒ Object (readonly)
Returns the value of attribute handler.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def handler @handler end |
#initialized_timestamp ⇒ Object (readonly)
Returns the value of attribute initialized_timestamp.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def @initialized_timestamp end |
#recorded_timestamp ⇒ Object (readonly)
Returns the value of attribute recorded_timestamp.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def @recorded_timestamp end |
#spider ⇒ Object (readonly)
Returns the value of attribute spider.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def spider @spider end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
7 8 9 |
# File 'lib/hash_spidey/hash_url_record.rb', line 7 def url @url end |
Class Method Details
.spidey_handle(url, handler, spider, opts) ⇒ Object
convenience name for spidey
14 15 16 17 18 19 20 |
# File 'lib/hash_spidey/hash_url_record.rb', line 14 def self.spidey_handle(url, handler, spider, opts) mash_opts = Hashie::Mash.new opts mash_opts.spider = spider mash_opts.handler = handler return HashUrlRecord.new url, mash_opts end |
Instance Method Details
#collected_timestamp ⇒ Object
obvious smells
58 |
# File 'lib/hash_spidey/hash_url_record.rb', line 58 def ; @recorded_timestamp; end |
#crawled? ⇒ Boolean
50 51 52 |
# File 'lib/hash_spidey/hash_url_record.rb', line 50 def crawled? !(.nil?) end |
#header ⇒ Object
59 |
# File 'lib/hash_spidey/hash_url_record.rb', line 59 def header; @crawl_metadata.header unless @crawl_metadata.nil? ; end |
#mark_as_crawled(page_obj = {}) ⇒ Object
saves data related
40 41 42 43 44 |
# File 'lib/hash_spidey/hash_url_record.rb', line 40 def mark_as_crawled(page_obj={}) @crawled_timestamp = Time.now # do something with mechanized page object @crawl_metadata = HashSpidey::CrawlRecord.new(page_obj, @crawled_timestamp) end |
#query_values ⇒ Object
69 70 71 |
# File 'lib/hash_spidey/hash_url_record.rb', line 69 def query_values @addressable_uri.query_values end |
#record_content(ct) ⇒ Object
34 35 36 37 |
# File 'lib/hash_spidey/hash_url_record.rb', line 34 def record_content(ct) @content = ct @recorded_timestamp = Time.now end |
#recorded? ⇒ Boolean
46 47 48 |
# File 'lib/hash_spidey/hash_url_record.rb', line 46 def recorded? !(@recorded_timestamp.nil?) end |