Class: InstagramCrawler::Parser::Html

Inherits:
Base
  • Object
show all
Defined in:
lib/instagram_crawler/parser/html.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Html

Returns a new instance of Html.



6
7
8
# File 'lib/instagram_crawler/parser/html.rb', line 6

def initialize(url)
  @html = get_html(url)
end

Instance Attribute Details

#htmlObject (readonly)

Returns the value of attribute html.



4
5
6
# File 'lib/instagram_crawler/parser/html.rb', line 4

def html
  @html
end

Instance Method Details

#parsingObject



10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/instagram_crawler/parser/html.rb', line 10

def parsing
  doc       = Nokogiri::HTML(html)
  js_data   = doc.at_xpath("//script[contains(text(),'window._sharedData')]")
  json      = JSON.parse(js_data.text[21..-2])
  profile   = json["entry_data"]["ProfilePage"][0]
  page_info = profile["graphql"]["user"]["edge_owner_to_timeline_media"]['page_info']
  user_id   = profile["logging_page_id"].delete("profilePage_")
  edges     = profile["graphql"]["user"]["edge_owner_to_timeline_media"]["edges"]

  loop_edges(edges)

  return page_info, user_id
end

#parsing_photo_pageObject



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/instagram_crawler/parser/html.rb', line 30

def parsing_photo_page
  doc       = Nokogiri::HTML(html)
  js_data   = doc.at_xpath("//script[contains(text(),'window._sharedData')]")
  json      = JSON.parse(js_data.text[21..-2])
  shortcode_media = json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"]

  if shortcode_media["edge_sidecar_to_children"]
    shortcode_media["edge_sidecar_to_children"]["edges"]
  else
    shortcode_media["display_url"]
  end
end

#parsing_video_pageObject



24
25
26
27
28
# File 'lib/instagram_crawler/parser/html.rb', line 24

def parsing_video_page
  doc    = Nokogiri::HTML(html)
  meta_v = doc.at_xpath("//meta[@property='og:video']")
  url    = meta_v.attribute_nodes.last.value
end