Class: Web2Text::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/web2text/crawler.rb

Instance Method Summary collapse

Constructor Details

#initialize(crawl, query = "body") ⇒ Crawler

Returns a new instance of Crawler.



3
4
5
6
# File 'lib/web2text/crawler.rb', line 3

def initialize(crawl, query="body")
  @crawl = crawl
  @query = query
end

Instance Method Details

#doc_as_plaintext(doc) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
# File 'lib/web2text/crawler.rb', line 8

def doc_as_plaintext(doc)
  # just using inner_text doesn't give us quite enough spaces :(
  doc.css(@query).collect do |j|
    bits = []
    j.traverse do |c|
      if c.text? then bits.push c.content end
    end

    bits.join(' ')
  end.join(' ')
end