Class: Mosquito::UserScraper
- Defined in:
- lib/mosquito/scrapers/user_scraper.rb
Instance Method Summary collapse
Methods inherited from Scraper
#get_content_of_page_from_url_curl, #get_content_of_subpage_from_url, #initialize
Constructor Details
This class inherits a constructor from Mosquito::Scraper
Instance Method Details
#parse(username) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/mosquito/scrapers/user_scraper.rb', line 10 def parse(username) # Stuff we need to get from the DOM (implemented is starred): # id # name # username # sign_up_date # location # profile_image_url # description # followers_count # following_count # tweet_count # listed_count # verified # url # profile_image_file_name .app_host = ENV["NITTER_URL"] username = username.delete("/") doc = Nokogiri::HTML(URI.open("#{ENV["NITTER_URL"]}/#{username}")) unless doc.xpath("//div[contains(@class, 'error-panel')]").empty? raise Mosquito::NoTweetFoundError end id = username full_name = doc.xpath("//a[contains(@class, 'profile-card-fullname')]/@title").first&.value username = username sign_up_date = DateTime.parse(doc.xpath("//div[contains(@class, 'profile-joindate')]/span/@title").first&.value) location = doc.xpath("//div[contains(@class, 'profile-location')]/span[last()]").first&.content profile_image_url = "#{Capybara.app_host}#{doc.xpath("//a[contains(@class, 'profile-card-avatar')]/@href").first&.value}" description = doc.xpath("//div[contains(@class, 'profile-bio')]/p").first&.content followers_count = doc.xpath("//li[contains(@class, 'followers')]/span[contains(@class, 'profile-stat-num')]").first&.content&.delete(",").to_i following_count = doc.xpath("//li[contains(@class, 'following')]/span[contains(@class, 'profile-stat-num')]").first&.content&.delete(",").to_i tweet_count = doc.xpath("//li[contains(@class, 'posts')]/span[contains(@class, 'profile-stat-num')]").first&.content&.delete(",").to_i listed_count = 0 # We can't get this from nitter, and it's not a big deal verified = !doc.xpath("//a[contains(@class, 'profile-card-fullname')]/div/span[contains(@title, 'Verified account')]").empty? url = doc.xpath("//div[contains(@class, 'profile-website')]/span[last()]/a/@href").first&.content profile_image_file_name = Mosquito.retrieve_media(profile_image_url) user = { id: id, name: full_name, username: username, sign_up_date: sign_up_date, location: location, profile_image_url: profile_image_url, description: description, followers_count: followers_count, following_count: following_count, tweet_count: tweet_count, listed_count: listed_count, verified: verified, url: url, profile_image_file_name: profile_image_file_name } user end |
#take_screenshot ⇒ Object
72 73 74 75 76 77 |
# File 'lib/mosquito/scrapers/user_scraper.rb', line 72 def take_screenshot # First check if a post has a fact check overlay, if so, clear it. # The only issue is that this can take *awhile* to search. Not sure what to do about that # since it's Instagram's fault for having such a fucked up obfuscated hierarchy # Take the screenshot and return it save_screenshot("#{Mosquito.temp_storage_location}/instagram_screenshot_#{SecureRandom.uuid}.png") end |