Class: Zorki::UserScraper

Inherits:
Scraper
  • Object
show all
Defined in:
lib/zorki/scrapers/user_scraper.rb

Instance Method Summary collapse

Methods inherited from Scraper

#get_content_of_subpage_from_url, #initialize

Constructor Details

This class inherits a constructor from Zorki::Scraper

Instance Method Details

#parse(username) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/zorki/scrapers/user_scraper.rb', line 7

def parse(username)
  # Stuff we need to get from the DOM (implemented is starred):
  # - *Name
  # - *Username
  # - *No. of posts
  # - *Verified
  # - *No. of followers
  # - *No. of people they follow
  # - *Profile
  #   - *description
  #   - *links
  # - *Profile image
  

  graphql_script = get_content_of_subpage_from_url("https://instagram.com/#{username}/", "?username=")

  if graphql_script.has_key?("author") && !graphql_script["author"].nil?
    user = graphql_script["author"]

    # Get the username (to verify we're on the right page here)
    scraped_username = user["identifier"]["value"]
    raise Zorki::Error unless username == scraped_username

    number_of_posts = graphql_script["interactionStatistic"].select do |stat|
      stat["interactionType"] == "https://schema.org/FilmAction"
    end.first

    number_of_followers = graphql_script["interactionStatistic"].select do |stat|
      stat["interactionType"] == "http://schema.org/FollowAction"
    end.first

    profile_image_url = user["image"]
    {
      name: user["name"],
      username: username,
      number_of_posts: Integer(number_of_posts["userInteractionCount"]),
      number_of_followers: Integer(number_of_followers["userInteractionCount"]),
      # number_of_following: user["edge_follow"]["count"],
      verified: user["is_verified"], # todo
      profile: graphql_script["description"],
      profile_link: user["sameAs"],
      profile_image: Zorki.retrieve_media(profile_image_url),
      profile_image_url: profile_image_url
    }
  else
    user = graphql_script["data"]["user"]

    # Get the username (to verify we're on the right page here)
    scraped_username = user["username"]
    raise Zorki::Error unless username == scraped_username

    profile_image_url = user["profile_pic_url_hd"]
    {
      name: user["full_name"],
      username: username,
      number_of_posts: user["edge_owner_to_timeline_media"]["count"],
      number_of_followers: user["edge_followed_by"]["count"],
      number_of_following: user["edge_follow"]["count"],
      verified: user["is_verified"],
      profile: user["biography"],
      profile_link: user["external_url"],
      profile_image: Zorki.retrieve_media(profile_image_url),
      profile_image_url: profile_image_url
    }
  end
end