Class: Web2Text::Crawl
- Inherits:
-
Object
- Object
- Web2Text::Crawl
- Defined in:
- lib/web2text/crawl.rb
Instance Attribute Summary collapse
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
- #filter(urls) ⇒ Object
- #focus?(url) ⇒ Boolean
-
#initialize(url, avoid = [], focus = []) ⇒ Crawl
constructor
A new instance of Crawl.
- #skip?(url) ⇒ Boolean
Constructor Details
#initialize(url, avoid = [], focus = []) ⇒ Crawl
Returns a new instance of Crawl.
6 7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/web2text/crawl.rb', line 6 def initialize(url, avoid = [], focus = []) @url = url @avoid = avoid.map { |a| a = URI::join(url, a) if !a.start_with? url a.to_s } @focus = focus.map { |a| a = URI::join(url, a) if !a.start_with? url a.to_s } end |
Instance Attribute Details
#url ⇒ Object (readonly)
Returns the value of attribute url.
4 5 6 |
# File 'lib/web2text/crawl.rb', line 4 def url @url end |
Instance Method Details
#filter(urls) ⇒ Object
20 21 22 |
# File 'lib/web2text/crawl.rb', line 20 def filter(urls) urls.reject {|u| self.skip? u} end |
#focus?(url) ⇒ Boolean
35 36 37 38 39 40 41 42 43 44 |
# File 'lib/web2text/crawl.rb', line 35 def focus?(url) if @focus.empty? true else url_s = url.to_s @focus.any? { |a| url_s.start_with? a } end end |
#skip?(url) ⇒ Boolean
24 25 26 27 28 29 30 31 32 33 |
# File 'lib/web2text/crawl.rb', line 24 def skip?(url) url_s = url.to_s if !url_s.start_with? @url return true end @avoid.any? { |a| url_s.start_with? a } end |