Class: Forki::UserScraper

Inherits:
Scraper
  • Object
show all
Defined in:
lib/forki/scrapers/user_scraper.rb

Instance Method Summary collapse

Methods inherited from Scraper

#download_image, #find_graphql_data_closure_index, #find_graphql_data_strings, #initialize

Constructor Details

This class inherits a constructor from Forki::Scraper

Instance Method Details

#extract_page_details(graphql_strings) ⇒ Object

Returns a hash of details about a Facebook page



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/forki/scrapers/user_scraper.rb', line 83

def extract_page_details(graphql_strings)
  page_cards_string = graphql_strings.find { |graphql_string| (graphql_string.include? "comet_page_cards") && \
                                                              (graphql_string.include? "follower_count")}
  page_cards_list = JSON.parse(page_cards_string)["page"]["comet_page_cards"]
  page_about_card = page_cards_list.find { |card| card["__typename"] == "CometPageAboutCardWithoutMapRenderer" }
  viewer_page_object = JSON.parse(graphql_strings.find { |graphql_string| (graphql_string.include? "profile_photo") && \
                                                                           graphql_string.include?("is_verified") })
  {
    id: page_about_card["page"]["id"],
    profile: page_about_card["page"]["page_about_fields"]["blurb"],
    number_of_followers: page_about_card["page"]["follower_count"],
    name: page_about_card["page"]["name"],
    verified: viewer_page_object["page"]["is_verified"],
    profile_image_url: viewer_page_object["page"]["profile_picture"]["uri"],
    number_of_likes: page_about_card["page"]["page_likers"]["global_likers_count"],
  }
end

#extract_profile_details(graphql_strings) ⇒ Object

Returns a hash of details about a Facebook user profile



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/forki/scrapers/user_scraper.rb', line 43

def extract_profile_details(graphql_strings)
  profile_header_str = graphql_strings.find { |gql| gql.include? "profile_header_renderer" }
  profile_intro_str = graphql_strings.find { |g| g.include? "profile_intro_card" }
  profile_header_obj = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]
  profile_intro_obj = profile_intro_str ? JSON.parse(profile_intro_str) : nil

  number_of_followers = find_number_of_followers(profile_header_str)

  # Check if the user shows followers count
  if number_of_followers.nil?
    profile_title_section = graphql_strings.find { |gql| gql.include? "profile_tile_section_type" }

    json = JSON.parse(profile_title_section)

    followers_node = []
    begin
      followers_node = json["user"]["profile_tile_sections"]["edges"].first["node"]["profile_tile_views"]["nodes"][1]["view_style_renderer"]["view"]["profile_tile_items"]["nodes"].select do |node|
        node["node"]["timeline_context_item"]["timeline_context_list_item_type"] == "INTRO_CARD_FOLLOWERS"
      end
    rescue NoMethodError; end

    if followers_node.empty?
      number_of_followers = nil
    else
      number_of_followers = find_number_followers_for_normal_profile(followers_node.first)
    end
  end

  {
    id: profile_header_obj["user"]["id"],
    number_of_followers: number_of_followers,
    name: profile_header_obj["user"]["name"],
    verified: profile_header_obj["user"]["is_verified"],
    profile: profile_intro_obj ? profile_intro_obj["profile_intro_card"]["bio"]["text"] : "",
    profile_image_url: profile_header_obj["user"]["profilePicLarge"]["uri"],
    number_of_likes: find_number_of_likes(profile_header_str),
  }
end

#find_number_followers_for_normal_profile(profile_followers_node) ⇒ Object



35
36
37
38
39
40
# File 'lib/forki/scrapers/user_scraper.rb', line 35

def find_number_followers_for_normal_profile(profile_followers_node)
  followers_string = profile_followers_node["node"]["timeline_context_item"]["renderer"]["context_item"]["title"]["text"]
  followers_pattern = /[0-9,]+/
  number_of_followers_match = followers_pattern.match(followers_string).to_s
  Scraper.extract_int_from_num_element(number_of_followers_match)
end

#find_number_of_followers(profile_details_string) ⇒ Object

Finds and returns the number of people who follow the current page



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/forki/scrapers/user_scraper.rb', line 17

def find_number_of_followers(profile_details_string)
  followers_pattern = /Followed by (?<num_followers>[0-9,.KM ]) people/
  alt_follower_pattern = /(?<num_followers>[0-9,.KM ]+) (f|F)ollowers/
  number_of_followers_match = followers_pattern.match(profile_details_string) || alt_follower_pattern.match(profile_details_string)

  return nil if number_of_followers_match.nil?

  number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_match.named_captures["num_followers"])

  # Note, this is sticking around if we want to use it later
  # if number_of_followers.nil?
  #   number_of_followers_string = JSON.parse(profile_header_str)["user"]["profile_header_renderer"]["user"]["profile_social_context"]["content"].first["text"]["text"]
  #   number_of_followers = Scraper.extract_int_from_num_element(number_of_followers_string)
  # end

  number_of_followers
end

#find_number_of_likes(profile_details_string) ⇒ Object

Finds and returns the number of people who like the current page



6
7
8
9
10
11
12
13
14
# File 'lib/forki/scrapers/user_scraper.rb', line 6

def find_number_of_likes(profile_details_string)
  likes_pattern = /[0-9,.KM ] likes/
  likes_pattern = /(?<num_likes>[0-9,.KM ]+) (l|L)ikes/
  number_of_likes_match = likes_pattern.match(profile_details_string)

  return nil if number_of_likes_match.nil?

  Scraper.extract_int_from_num_element(number_of_likes_match.named_captures["num_likes"])
end

#parse(url) ⇒ Object

Uses GraphQL data and DOM elements to collect information about the current user page



102
103
104
105
106
107
108
109
110
111
112
# File 'lib/forki/scrapers/user_scraper.rb', line 102

def parse(url)
  validate_and_load_page(url)
  graphql_strings = find_graphql_data_strings(page.html)
  is_page = graphql_strings.map { |s| JSON.parse(s) }.any? { |o| o.key?("page") }
  user_details = is_page ? extract_page_details(graphql_strings) : extract_profile_details(graphql_strings)

  user_details[:profile_image_file] = Forki.retrieve_media(user_details[:profile_image_url])
  user_details[:profile_link] = url

  user_details
end