Class: Github::Trending::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/github_trending/scraper.rb

Constant Summary collapse

BASE_HOST =
'https://github.com'
BASE_URL =
"#{BASE_HOST}/trending"

Instance Method Summary collapse

Constructor Details

#initializeScraper

Returns a new instance of Scraper.



27
28
29
30
31
32
# File 'lib/github_trending/scraper.rb', line 27

def initialize
  @agent = Mechanize.new
  @agent.user_agent = "github-trending #{VERSION}"
  proxy = URI.parse(ENV['http_proxy']) if ENV['http_proxy']
  @agent.set_proxy(proxy.host, proxy.port, proxy.user, proxy.password) if proxy
end

Instance Method Details

#get(language = nil, since = nil) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/github_trending/scraper.rb', line 34

def get(language = nil, since = nil)
  projects = []
  page = @agent.get(generate_url_for_get(language, since))

  page.search('.repo-list-item').each do |content|
    project = Project.new
     = content.search('.repo-list-meta').text
    project.lang, project.star_count = extract_lang_and_star_from_meta()
    project.name        = content.search('.repo-list-name a').text.split.join
    project.description = content.search('.repo-list-description').text.gsub("\n", '').strip
    projects << project
  end
  fail ScrapeException if projects.empty?
  projects
end

#list_languagesObject



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/github_trending/scraper.rb', line 50

def list_languages
  languages = []
  page = @agent.get(BASE_URL)
  page.search('div.select-menu-item a').each do |content|
    href = content.attributes['href'].value
    # objective-c++ =>
    language = href.match(/github.com\/trending\?l=(.+)/).to_a[1]
    languages << CGI.unescape(language) if language
  end
  languages
end