Class: Zorki::UserScraper

Inherits:
Scraper
  • Object
show all
Defined in:
lib/zorki/scrapers/user_scraper.rb

Instance Method Summary collapse

Methods inherited from Scraper

#get_content_of_subpage_from_url, #initialize

Constructor Details

This class inherits a constructor from Zorki::Scraper

Instance Method Details

#parse(username) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/zorki/scrapers/user_scraper.rb', line 7

def parse(username)
  # Stuff we need to get from the DOM (implemented is starred):
  # - *Name
  # - *Username
  # - *No. of posts
  # - *Verified
  # - *No. of followers
  # - *No. of people they follow
  # - *Profile
  #   - *description
  #   - *links
  # - *Profile image
  

  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")
  graphql_script = graphql_script.first if graphql_script.class == Array

  if graphql_script.nil?
    graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "web_profile_info")
  end

  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
    user = graphql_script["author"]

    # Get the username (to verify we're on the right page here)
    scraped_username = user["identifier"]["value"]
    raise Zorki::Error unless username == scraped_username

    number_of_posts = graphql_script["interactionStatistic"].select do |stat|
      ["https://schema.org/FilmAction", "http://schema.org/WriteAction"].include?(stat["interactionType"])
    end.first

    number_of_followers = graphql_script["interactionStatistic"].select do |stat|
      stat["interactionType"] == "http://schema.org/FollowAction"
    end.first

    begin
      profile_image_url = user["image"]
      {
        name: user["name"],
        username: username,
        number_of_posts: Integer(number_of_posts["userInteractionCount"]),
        number_of_followers: Integer(number_of_followers["userInteractionCount"]),
        # number_of_following: user["edge_follow"]["count"],
        verified: user["is_verified"], # todo
        profile: graphql_script["description"],
        profile_link: user["sameAs"],
        profile_image: Zorki.retrieve_media(profile_image_url),
        profile_image_url: profile_image_url
      }
    end
  else
    user = graphql_script["data"]["user"]

    # Get the username (to verify we're on the right page here)
    scraped_username = user["username"]
    raise Zorki::Error unless username == scraped_username

    profile_image_url = user["profile_pic_url_hd"]
    {
      name: user["full_name"],
      username: username,
      number_of_posts: user["edge_owner_to_timeline_media"]["count"],
      number_of_followers: user["edge_followed_by"]["count"],
      number_of_following: user["edge_follow"]["count"],
      verified: user["is_verified"],
      profile: user["biography"],
      profile_link: user["external_url"],
      profile_image: Zorki.retrieve_media(profile_image_url),
      profile_image_url: profile_image_url
    }
  end
end