Class: ImagesLink

Inherits:
Object
  • Object
show all
Defined in:
lib/ImagesLink.rb

Instance Method Summary collapse

Constructor Details

#initialize(link) ⇒ ImagesLink

Returns a new instance of ImagesLink.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/ImagesLink.rb', line 8

def initialize(link)
  @link = link

  begin
    @response = HTTParty.get(@link)
  rescue Errno::ECONNREFUSED => e
    puts "Неправильный урл"
    abort e.message
  end

  @doc = Nokogiri::HTML(@response.body)
  @doc.search('//noscript').each { |node| node.remove } # убираю мешающие ноды
  @arra_links = [] # сдесь будут храниться все урлы картинок
  @handler_link = HandlerLink.new # обработчик урлов
  @link_host_name = @handler_link.get_host_link(@link) # беру имя домена
end

Instance Method Details

#get_images_from_urlObject

возвращает, все найденные, урлы картинок



26
27
28
29
30
# File 'lib/ImagesLink.rb', line 26

def get_images_from_url
  @arra_links = (get_url_with_attr_img_link + get_url_with_other_attr).uniq
  @handler_link.remove_global_unless_symbols(@arra_links)
  @arra_links
end


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/ImagesLink.rb', line 32

def get_url_with_attr_img_link
  img_arra = []
  # пробегаю по тегам img (meta og:images...), хватаю урл и закидываю в @arra_links
  @doc.xpath('//img').each do |img|
    if img['src'] != nil
      arra = [img['src'].to_s]
      src = @handler_link.remove_unless_symbols(arra)
      got_link = @handler_link.handler_prefix_link(@link_host_name, src.to_s.delete!("[\"]"))

      img_arra << got_link
    end
  end

  @doc.xpath('//link').each do |link|
    if link['href'] != nil && link['type'] != nil
      if link['type'].include?("image")
        got_link = @handler_link.handler_prefix_link(@link_host_name, link['href'])
        img_arra << got_link
      end
    end
  end

  @arra_links.uniq!
  @handler_link.remove_unless_link(@arra_links)
  img_arra
end

#get_url_with_other_attrObject



59
60
61
62
63
64
65
66
# File 'lib/ImagesLink.rb', line 59

def get_url_with_other_attr
  # нахожу все урлы с jpg, png, gif... и закидываю в @arra_links
  @images_links = URI.extract(@doc.to_s.encode("UTF-16be", :invalid => :replace, :replace => "?").encode('UTF-8')).select { |l| l[/\.(?:gif|png|jpe?g)\b/] }
  @handler_link.handler_links(@images_links, @link) # обрабатываю урлы
  @handler_link.remove_unless_symbols(@images_links)
  @handler_link.remove_unless_link(@images_links)
  @images_links
end