Module: ColRead::Core

Included in:
Text
Defined in:
lib/colread.rb

Instance Method Summary collapse

Instance Method Details

#_open(url) ⇒ Object



40
41
42
43
44
# File 'lib/colread.rb', line 40

def _open url
  source = open(url).read
  source.force_encoding(@encode)
  source.encode!("utf-8", :undef => :replace, :replace => "?", :invalid => :replace)
end

#ahref(a) ⇒ Object



22
23
24
25
26
27
28
29
30
# File 'lib/colread.rb', line 22

def ahref a
  if a['href'].start_with?('http')
    a['href']
  elsif a['href'].start_with?('/')
    @root+a['href']
  else
    @url.sub(/\/(index.\w+)?$/, '') + '/' + a['href']
  end
end

#chapters(url) ⇒ Object



17
18
19
20
# File 'lib/colread.rb', line 17

def chapters url
  doc = Nokogiri::HTML(_open(url))
  doc.css('a').select{|a| a.text =~ /\S/ }.group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last
end

#contents(chapters) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/colread.rb', line 32

def contents chapters
  chapters.each do |a|
    source = _open(ahref(a))
    content=Nokogiri::HTML(Readability::Document.new(source).content).text
    yield [a.text, content]
  end
end