Class: LinkFinder::Finder

Inherits:
Object
  • Object
show all
Defined in:
lib/link_finder.rb

Instance Method Summary collapse

Constructor Details

#initializeFinder

Returns a new instance of Finder.



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/link_finder.rb', line 11

def initialize
  trap("SIGINT") { exit! }
  @mutex = Mutex.new

  Gtk.init
  builder = Gtk::Builder.new
  builder.add_from_file(File.join(File.dirname(__FILE__), 'link_finder.glade'))
  builder.connect_signals {|handler| method(handler) }

  @wnd_main       = builder.get_object('wnd_main')
  @fcb_links      = builder.get_object('fcb_links')
  @fcb_sites      = builder.get_object('fcb_sites')
  @tbtn_startstop = builder.get_object('tbtn_startstop')
  @sb_threads     = builder.get_object('sb_threads')
  @adj_threads    = builder.get_object('adj_threads')
  @sb_recurse     = builder.get_object('sb_recurse')
  @adj_recurse    = builder.get_object('adj_recurse')
  @pb_progress    = builder.get_object('pb_progress')
  @img_diagram    = builder.get_object('img_diagram')

  @options = {}
  begin
    @options = File.open(File.join(File.dirname(__FILE__), "options.dat"), "rb") {|f| Marshal.load(f)} 
  rescue Exception => e
    puts e
  end

  @fcb_links.filename = @options[:links_filename] unless @options[:links_filename].nil?
  @fcb_sites.filename = @options[:sites_filename] unless @options[:sites_filename].nil?
  @options[:threads]  = 5 if @options[:threads].nil?
  @adj_threads.value  = @options[:threads] 
  @options[:recurse]  = 2 if @options[:recurse].nil?
  @adj_recurse.value  = @options[:recurse] 

  sensitive_state

  @wnd_main.show
  Gtk.main
end

Instance Method Details

#gtk_main_quitObject



55
56
57
58
# File 'lib/link_finder.rb', line 55

def gtk_main_quit
  @browser.close unless @browser.nil?
  Gtk.main_quit()
end


69
70
71
72
# File 'lib/link_finder.rb', line 69

def on_fcb_links_file_set(widget)
  @options[:links_filename] = widget.filename
  sensitive_state
end

#on_fcb_sites_file_set(widget) ⇒ Object



74
75
76
77
# File 'lib/link_finder.rb', line 74

def on_fcb_sites_file_set(widget)
  @options[:sites_filename] = widget.filename
  sensitive_state
end

#on_sb_recurse_value_changed(widget) ⇒ Object



83
84
85
# File 'lib/link_finder.rb', line 83

def on_sb_recurse_value_changed(widget)
  @options[:recurse] = @adj_recurse.value
end

#on_sb_threads_value_changed(widget) ⇒ Object



79
80
81
# File 'lib/link_finder.rb', line 79

def on_sb_threads_value_changed(widget)
  @options[:threads] = @adj_threads.value
end

#on_tbtn_startstop_toggled(widget) ⇒ Object



87
88
89
90
91
92
93
# File 'lib/link_finder.rb', line 87

def on_tbtn_startstop_toggled(widget)
  if widget.active?
    start_process
  else
    stop_process
  end
end

#on_wnd_main_delete_event(widget, event) ⇒ Object



60
61
62
63
64
65
66
67
# File 'lib/link_finder.rb', line 60

def on_wnd_main_delete_event(widget, event)
  begin
    File.open(File.join(File.dirname(__FILE__), "options.dat"), "wb") {|f| Marshal.dump(@options, f)}
  rescue Exception => e
    puts e
  end
  return false
end

#on_wnd_main_destroyObject



51
52
53
# File 'lib/link_finder.rb', line 51

def on_wnd_main_destroy
  gtk_main_quit
end

#progress_resetObject



102
103
104
105
# File 'lib/link_finder.rb', line 102

def progress_reset
  @pb_progress.text = " "
  @pb_progress.fraction = 0
end

#progress_set(max, value) ⇒ Object



107
108
109
110
111
112
# File 'lib/link_finder.rb', line 107

def progress_set(max, value)
  perc = value * 100 / max
  @pb_progress.text = "#{value} из #{max}"
  @pb_progress.fraction = perc * 0.01
  Gtk::main_iteration_do(blocking = false) while Gtk::events_pending?
end

#recurse_select_refs(host, url, links, result, recurse) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/link_finder.rb', line 216

def recurse_select_refs(host, url, links, result, recurse)
  return if recurse <= 0 

  doc = ''
  begin
    status = Timeout::timeout(10) do
      doc = Nokogiri::HTML(open(url))
    end
  rescue Exception => e
    puts e
    return nil
  end

  refs =  doc.xpath('//a')
  refs.each do |r|
    begin
      uri = URI.parse(r['href'])
    rescue Exception => e
      puts e.to_s + '-' + r['href'].to_s
      next
    end

    uri.scheme  = URI.parse(host).scheme if uri.scheme.nil?
    uri.host    = URI.parse(host).host   if uri.host.nil?

    index = links.index(uri.to_s)
    result << { :page => url, :link => uri.to_s } unless index.nil?

    # puts ".#{recurse} - #{uri.to_s}"

    recurse_select_refs(host, uri.to_s, links, result, recurse - 1) if uri.host == URI.parse(host).host
  end
end

#sensitive_stateObject



95
96
97
98
99
100
# File 'lib/link_finder.rb', line 95

def sensitive_state
  result = (!@options[:links_filename].nil? and !@options[:sites_filename].nil?)
  @tbtn_startstop.sensitive = result
  @sb_threads.sensitive = result
  @sb_recurse.sensitive = result
end

#start_processObject



115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/link_finder.rb', line 115

def start_process
  @report_filename = "report_#{Time.now.to_s.gsub(/[\s\+]/, "_").gsub(/\:/,'-')}"

  progress_reset
  @progress_value = 0

  @fcb_links.sensitive  =
  @fcb_sites.sensitive  =
  @sb_threads.sensitive =
  @sb_recurse.sensitive = false

  @threads = []
  @result = []
  @main_thread = Thread.new do
    @links = File.open(@options[:links_filename], "rb:UTF-8").read.split(/[\n\r]/).map do |l| 
      begin 
        URI.parse(l.gsub(/[\n\r]/, '')).to_s
      rescue Exception => e
        puts e
      end
    end

    @sites = File.open(@options[:sites_filename], "rb:UTF-8").read.split(/[\n\r]/).map do |s| 
      begin 
        uri = URI.parse(s.gsub(/[\n\r]/, ''))
        URI::HTTP.new('http', nil, uri.host, nil, nil, nil, nil, nil, nil).to_s
      rescue Exception => e
        puts e
      end
    end

    section = 0
    @sites.each_index do |index|
      site = @sites[index]

      sleep 0.1 while (Thread.list.length() - 1 > @options[:threads])
      @threads << Thread.new(site, @links, @options[:recurse]) do |site, links, recurse|
        r = []
        recurse_select_refs(site, site, links.clone, r, recurse)
        @mutex.synchronize do
          r.each { |t| @result << t } 
          @progress_value += 1
          progress_set(@sites.length, @progress_value)
        end
      end
      # break if index >= 10
    end
    @threads.each {|t| t.join}

    dia_data = []
    File.open("#{@report_filename}.txt", "wb:UTF-8") do |f|
      f.write Time.now.to_s + "\n"
      all = @sites.length
      @result.each do |r|
        url = URI.parse(r[:page])
        url = URI::HTTP.new('http', nil, url.host, nil, nil, nil, nil, nil, nil).to_s
        index = @sites.index(url)
        @sites.delete_at(index) unless index.nil?
      end
      no = @sites.length
      yes = all - no

      f.write "Обработано сайтов #{all}\n"
      f.write "Ссылки найдены на #{yes} сайтах\n"
      f.write "Ссылки не обнаружены на #{no} сайтах\n"
      f.write "Эффективность размещения #{(yes.to_f * 100 / all.to_f).round(2).to_s}%\n"

      f.write "\n\nНайденные ссылки:\n"
      f.write "-----------------------\n"

      @links.each do |l|
        f.write "#{l}:\n---\n"
        c = 0
        @result.each do |r| 
          if r[:link] == l
            c += 1
            f.write "#{r[:page]}\n" 
          end
        end
        dia_data << {:link => l, :count => c}
      end

      f.write "\n\nНе найденные ссылки:\n"
      f.write "-----------------------\n"
      @sites.each do |s|
        f.write "#{s}\n"
      end
    end 
  end
end

#stop_processObject



206
207
208
209
210
211
212
213
214
# File 'lib/link_finder.rb', line 206

def stop_process
  @main_thread.kill unless @main_thread.nil?
  @threads.each { |t| t.kill }  
  progress_reset
  @fcb_links.sensitive  =
  @fcb_sites.sensitive  =
  @sb_threads.sensitive = 
  @sb_recurse.sensitive = true
end