Module: ColRead::Core
- Included in:
- Text
- Defined in:
- lib/colread.rb
Instance Method Summary collapse
Instance Method Details
#_open(url) ⇒ Object
40 41 42 43 44 |
# File 'lib/colread.rb', line 40 def _open url source = open(url).read source.force_encoding(@encode) source.encode!("utf-8", :undef => :replace, :replace => "?", :invalid => :replace) end |
#ahref(a) ⇒ Object
22 23 24 25 26 27 28 29 30 |
# File 'lib/colread.rb', line 22 def ahref a if a['href'].start_with?('http') a['href'] elsif a['href'].start_with?('/') @root+a['href'] else @url.sub(/\/(index.\w+)?$/, '') + '/' + a['href'] end end |
#chapters(url) ⇒ Object
17 18 19 20 |
# File 'lib/colread.rb', line 17 def chapters url doc = Nokogiri::HTML(_open(url)) doc.css('a').select{|a| a.text =~ /\S/ }.group_by{|a| a.indent}.sort_by{|a| a.last.count}.last.last end |
#contents(chapters) ⇒ Object
32 33 34 35 36 37 38 |
# File 'lib/colread.rb', line 32 def contents chapters chapters.each do |a| source = _open(ahref(a)) content=Nokogiri::HTML(Readability::Document.new(source).content).text yield [a.text, content] end end |