Class: Archare::Crawler

Inherits:
Object
  • Object
show all
Includes:
Singleton
Defined in:
lib/archare/crawler.rb

Constant Summary collapse

@@LEETCODE_URI_BASE =
"https://leetcode.com/"
@@LEETCODE_URI_PROBLEM =
@@LEETCODE_URI_BASE + "problemset/algorithms/"
@@LEETCODE_URI_TAG =
@@LEETCODE_URI_BASE + "tag/"

Instance Method Summary collapse

Instance Method Details

#lc_problems(update = false, uri = @@LEETCODE_URI_PROBLEM) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/archare/crawler.rb', line 19

def lc_problems(update = false, uri = @@LEETCODE_URI_PROBLEM)
  
  problems = []
  

  unless update
    tags_hash = read_json_file "lc_problems"
    problems = tags_hash["problems"]
  else
    body = get_dom_from_uri(uri)
    problem_list = body.css('table')

    links = problem_list.css('a').each do |link|
      href_str = link['href'].to_s
      if href_str.include? '/problems/'
        problems << link.text
        # problems << href_str.split('problems/')[-1][0..-2].gsub('-', ' ')
      end
    end

    write_json_file "lc_problems", {"problems" => problems}
  end

  return problems
end

#lc_tags(update = false, uri = @@LEETCODE_URI_PROBLEM) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/archare/crawler.rb', line 46

def lc_tags(update = false, uri = @@LEETCODE_URI_PROBLEM)
  
  tags = []
  tags_hash = {}
  unless update 
    tags_hash = read_json_file "lc_tags"
    tags = tags_hash['tags']
  else
    body = get_dom_from_uri(uri)
    links = body.css('a').each do |link|
      href_str = link['href'].to_s
      if href_str.include? '/tag/'
        tags << href_str.split('tag/')[-1][0..-2]
      end
    end

    tags_hash['tags'] = tags
    write_json_file "lc_tags", tags_hash
  end

  return tags
end

#lc_tags_problems_map(update = false) ⇒ Object



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/archare/crawler.rb', line 69

def lc_tags_problems_map(update = false)
  

  map = Hash.new

  unless update 
    map = read_json_file "lc_tags_problems_map"
  else
    tags = lc_tags(true)

    puts "updating tags-problems map. It will finish in about 1 minute \n\n"
    tags.each_with_index do |tag, i|
      
      map[tag] = lc_problems(true, lc_tag_uri_of(tag))
      print "\033[1A \r #{i + 1} of #{tags.length} tags: #{tag}                                        \n"
      $stdout.flush
    end

    write_json_file "lc_tags_problems_map", map
  end

  puts tags, map

  return map
end

#update_lc_dataObject



96
97
98
99
# File 'lib/archare/crawler.rb', line 96

def update_lc_data
  lc_tags_problems_map true
  lc_problems true
end