Class: Forki::UserScraper

Inherits:
Scraper
  • Object
show all
Defined in:
lib/forki/scrapers/user_scraper.rb

Instance Method Summary collapse

Methods inherited from Scraper

#download_image, #find_graphql_data_closure_index, #find_graphql_data_strings, #initialize

Constructor Details

This class inherits a constructor from Forki::Scraper

Instance Method Details

#extract_page_details(graphql_strings) ⇒ Object

Returns a hash of details about a Facebook page



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/forki/scrapers/user_scraper.rb', line 63

def extract_page_details(graphql_strings)
  page_cards_string = graphql_strings.find { |graphql_string| (graphql_string.include? "comet_page_cards") && \
                                                              (graphql_string.include? "follower_count")}
  page_cards_list = JSON.parse(page_cards_string)["page"]["comet_page_cards"]
  page_about_card = page_cards_list.find { |card| card["__typename"] == "CometPageAboutCardWithoutMapRenderer" }
  viewer_page_object = JSON.parse(graphql_strings.find { |graphql_string| (graphql_string.include? "profile_photo") && \
                                                                           graphql_string.include?("is_verified") })
  {
    id: page_about_card["page"]["id"],
    profile: page_about_card["page"]["page_about_fields"]["blurb"],
    number_of_followers: page_about_card["page"]["follower_count"],
    name: page_about_card["page"]["name"],
    verified: viewer_page_object["page"]["is_verified"],
    profile_image_url: viewer_page_object["page"]["profile_picture"]["uri"],
    number_of_likes: page_about_card["page"]["page_likers"]["global_likers_count"],
  }
end

#extract_profile_details(graphql_strings) ⇒ Object

Returns a hash of details about a Facebook user profile



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/forki/scrapers/user_scraper.rb', line 29

def extract_profile_details(graphql_strings)
  profile_header_str = graphql_strings.find { |gql| gql.include? "profile_header_renderer" }
  profile_intro_str = graphql_strings.find { |g| g.include? "profile_intro_card" }
  profile_header_obj = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]
  profile_intro_obj = profile_intro_str ? JSON.parse(profile_intro_str) : nil

  number_of_followers = find_number_of_followers(profile_header_str)

  # Check if the user shows followers count
  if number_of_followers.nil?
    profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }

    json = JSON.parse(profile_title_section)
    followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
      node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
    end
    if followers_node.empty?
      number_of_followers = nil
    else
      number_of_followers = find_number_followers_for_normal_profile(followers_node.first)
    end
  end

  {
    id: profile_header_obj["user"]["id"],
    number_of_followers: number_of_followers,
    name: profile_header_obj["user"]["name"],
    verified: profile_header_obj["user"]["is_verified"],
    profile: profile_intro_obj ? profile_intro_obj["profile_intro_card"]["bio"]["text"] : "",
    profile_image_url: profile_header_obj["user"]["profilePicLarge"]["uri"],
  }
end

#find_number_followers_for_normal_profile(profile_followers_node) ⇒ Object



21
22
23
24
25
26
# File 'lib/forki/scrapers/user_scraper.rb', line 21

def find_number_followers_for_normal_profile(profile_followers_node)
  followers_string = profile_followers_node["node"]["timeline_context_item"]["renderer"]["context_item"]["title"]["text"]
  followers_pattern = /[0-9,]+/
  number_of_followers_match = followers_pattern.match(followers_string).to_s
  extract_int_from_num_element(number_of_followers_match)
end

#find_number_of_followers(profile_details_string) ⇒ Object

Finds and returns the number of people who follow the current page



13
14
15
16
17
18
19
# File 'lib/forki/scrapers/user_scraper.rb', line 13

def find_number_of_followers(profile_details_string)
  followers_pattern = /Followed by (?<num_followers>[0-9,.KM ]) people/
  alt_follower_pattern = /(?<num_followers>[0-9,.KM ]+) (f|F)ollowers/
  number_of_followers_match = followers_pattern.match(profile_details_string) || alt_follower_pattern.match(profile_details_string)
  return nil if number_of_followers_match.nil?
  extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])
end

#find_number_of_likesObject

Finds and returns the number of people who like the current page



6
7
8
9
10
# File 'lib/forki/scrapers/user_scraper.rb', line 6

def find_number_of_likes
  likes_pattern = /[0-9,.KM ] people like this/
  number_of_likes_elem = all("span").filter { | span| likes_pattern.match? span.text }.first
  extract_int_from_num_element(number_of_likes_elem)
end

#parse(url) ⇒ Object

Uses GraphQL data and DOM elements to collect information about the current user page



82
83
84
85
86
87
88
89
90
91
92
# File 'lib/forki/scrapers/user_scraper.rb', line 82

def parse(url)
  validate_and_load_page(url)
  graphql_strings = find_graphql_data_strings(page.html)
  is_page = graphql_strings.map { |s| JSON.parse(s) }.any? { |o| o.key?("page") }
  user_details = is_page ? extract_page_details(graphql_strings) : extract_profile_details(graphql_strings)

  user_details[:profile_image_file] = Forki.retrieve_media(user_details[:profile_image_url])
  user_details[:profile_link] = url

  user_details
end