Module: Extraction

Included in:: Blogbot

Defined in:: lib/blogbot/extraction.rb

Overview

Adds capability to extract data in an organized format from webpage.

Instance Method Summary collapse

#display_links ⇒ Object
#extract(url) ⇒ Object
#extract_links ⇒ Object

Extracts titles and hyperlinks from element being examined.

Instance Method Details

#display_links ⇒ `Object`

# File 'lib/blogbot/extraction.rb', line 19

def display_links
  puts "-"*50
  @popular_links.each do |hash|
    hash.each do |k, v|
      puts "#{k.upcase}: #{v}"
    end
    puts
  end
  puts "-"*50
  @popular_links
end

#extract(url) ⇒ `Object`

# File 'lib/blogbot/extraction.rb', line 31

def extract(url)
  reset
  puts "\nExtracting ...\n"
  scan url
  locate_popular_links
  extract_links
  @popular_links.nil? == true ? simple_error : display_links
end

#extract_links ⇒ `Object`

Extracts titles and hyperlinks from element being examined. If the text is an empty ” it’s an <img>. Images are typically duplicate links and ok to skip.

# File 'lib/blogbot/extraction.rb', line 7

def extract_links
  puts 'Not enough links to extract' if see_multiple_links? == false

  @current_element.css('a').each do |a|
    next if a.text == '' || a['href'] == '#'
    title = a.text
    link = a['href']
    hash = {title: title, link: link}
    @popular_links << hash
  end
end