Class: Newly::PageCrawler
- Inherits:
-
Object
- Object
- Newly::PageCrawler
- Defined in:
- lib/newly/page_crawler.rb
Instance Method Summary collapse
- #image(element) ⇒ Object
-
#initialize(host, document) ⇒ PageCrawler
constructor
A new instance of PageCrawler.
- #link(element) ⇒ Object
- #text(element) ⇒ Object
- #titleize(element) ⇒ Object
Constructor Details
#initialize(host, document) ⇒ PageCrawler
Returns a new instance of PageCrawler.
3 4 5 6 |
# File 'lib/newly/page_crawler.rb', line 3 def initialize(host, document) @host = host @document = document end |
Instance Method Details
#image(element) ⇒ Object
28 29 30 31 32 33 34 35 |
# File 'lib/newly/page_crawler.rb', line 28 def image(element) image = find(element, 'src') if (image && image.include?("==/")) image = "http://#{image.split("==/").last}" end image = "#{@host}/#{image}".gsub('../', '') if image && image.include?('../') image end |
#link(element) ⇒ Object
22 23 24 25 26 |
# File 'lib/newly/page_crawler.rb', line 22 def link(element) href = find(element, 'href') href = "#{@host}/#{href}".gsub('../', '') if href && !href.include?('http') href end |
#text(element) ⇒ Object
15 16 17 18 19 20 |
# File 'lib/newly/page_crawler.rb', line 15 def text(element) if valid?(element) text = get(element).text text if valid?(text) end end |
#titleize(element) ⇒ Object
8 9 10 11 12 13 |
# File 'lib/newly/page_crawler.rb', line 8 def titleize(element) title = text(element) title[0] = title.capitalize[0] if title title end |