Module: Searchy

Included in:
Bing, Google, GoogleGroups, Linkedin, PGP, Yahoo
Defined in:
lib/esearchy/searchy.rb

Instance Method Summary collapse

Instance Method Details

#clean(&block) ⇒ Object



232
233
234
# File 'lib/esearchy/searchy.rb', line 232

def clean( &block )
  @emails.delete_if &block.call
end

#fix(list) ⇒ Object



224
225
226
227
228
229
230
# File 'lib/esearchy/searchy.rb', line 224

def fix(list)
  list.each do |e|
    e.gsub!(" at ","@")
    e.gsub!("_at_","@")
    e.gsub!(" dot ",".")
  end
end

#hash_url(url) ⇒ Object



220
221
222
# File 'lib/esearchy/searchy.rb', line 220

def hash_url(url)
  Digest::SHA2.hexdigest("#{Time.now.to_f}--#{url}")
end

#maxhits=(value) ⇒ Object



236
237
238
# File 'lib/esearchy/searchy.rb', line 236

def maxhits=( value )
  @totalhits = value
end

HELPER METHODS ———————————————————————————



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/esearchy/searchy.rb', line 196

def print_emails(list)
  list.each do |email|
    unless @emails.include?(email)
      unless RUBY_PLATFORM =~ /mingw|mswin/
        if email.match(/#{@query.gsub("@","").split('.')[0]}/)
          puts "\033[31m" + email + "\033\[0m"
        else
          puts "\033[32m" + email + "\033\[0m"
        end
      else
        if email.match(/#{@query.gsub("@","").split('.')[0]}/)
          Wcol::color(12)
          puts email
          Wcol::color(7)
        else
          Wcol::color(2)
          puts email
          Wcol::color(7)
        end
      end
    end
  end
end

#search_depthObject



240
241
242
243
244
245
246
247
# File 'lib/esearchy/searchy.rb', line 240

def search_depth
  search_pdfs @r_pdfs if @r_pdfs
  search_txts @r_txts if @r_txts
  search_office_xml @r_officexs if @r_officexs
  if RUBY_PLATFORM =~ /mingw|mswin/
    search_docs @r_docs if @r_docs
  end
end

#search_docs(urls) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/esearchy/searchy.rb', line 79

def search_docs(urls)
  while urls.size >= 1
     @threads << Thread.new do
       web = URI.parse(urls.pop)
       puts "Searching in DOC: #{web.to_s}\n"
       begin
         http = Net::HTTP.new(web.host,80)
         http.start do |http|
           request = Net::HTTP::Get.new("#{web.path}#{web.query}")
           response = http.request(request)
           case response
           when Net::HTTPSuccess, Net::HTTPRedirection
             name = Searchy::TEMP + "#{hash_url(web.to_s)}.doc"
             open(name, "wb") do |file|
               file.write(response.body)
             end
             begin
               word = WIN32OLE.new('word.application')
               word.documents.open(name)
               word.selection.wholestory
               search_emails(word.selection.text.chomp)
               word.activedocument.close( false )
               word.quit
             rescue
               puts "Something went wrong parsing the .doc}\n"
             end
             `rm "#{name}"`
           else
             return response.error!
           end
         end
       rescue Net::HTTPFatalError
         puts "Error: Something went wrong with the HTTP request.\n"
       rescue Net::HTTPServerException
         puts "Error: Not longer there. 404 Not Found.\n"
       rescue
         puts "Error: < .. SocketError .. >\n"
       end
     end
   end
   @threads.each {|t| t.join } if @threads != nil
end

#search_emails(string) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/esearchy/searchy.rb', line 22

def search_emails(string)
  string = string.gsub("<em>","") if self.class == Google #still not sure if this is going to work.
  # OLD regex list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
  list = string.scan(/[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*_at_(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\.[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\s@\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\
[a-z0-9!#$&'*+=?^_`{|}~-]+(?:\sdot\s[a-z0-9!#$&'*+=?\^_`{|}~-]+)*\sat\s(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\sdot\s)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?/)
  @lock.synchronize do
    print_emails(list)
    @emails.concat(fix(list)).uniq!
  end
end

#search_office_xml(urls) ⇒ Object



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/esearchy/searchy.rb', line 123

def search_office_xml(urls)
  while urls.size >= 1
    @threads << Thread.new do
      web = URI.parse(urls.pop)
      format = web.scan(/docx|xlsx|pptx/i)[0]
      puts "Searching in #{format.upcase}: #{web.to_s}\n"
      begin
        http = Net::HTTP.new(web.host,80)
        http.start do |http|
          request = Net::HTTP::Get.new("#{web.path}#{web.query}")
          response = http.request(request)
          case response
          when Net::HTTPSuccess, Net::HTTPRedirection
            name = Searchy::TEMP + "#{hash_url(web.to_s)}." + format
            open(name, "wb") do |file|
              file.write(response.body)
            end
            begin
              Zip::ZipFile.open(name) do |zip|
                text = z.entries.each { |e| zip.file.read(e.name) if e.name =~ /.xml$/}
                search_emails(text)
              end
            rescue
              puts "Something went wrong parsing the .#{format.downcase}\n"
            end
            `rm "#{name}"`
          else
            return response.error!
          end
        end
      rescue Net::HTTPFatalError
        puts "Error: Something went wrong with the HTTP request.\n"
      rescue Net::HTTPServerException
        puts "Error: Not longer there. 404 Not Found.\n"
      rescue
        puts "Error: < .. SocketError .. >\n"
      end
    end
  end
  @threads.each {|t| t.join } if @threads != nil
end

#search_pdfs(urls) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/esearchy/searchy.rb', line 36

def search_pdfs(urls)
  while urls.size >= 1
    @threads << Thread.new do
      web = URI.parse(urls.pop)
      puts "Searching in PDF: #{web.to_s}\n"
      begin
        http = Net::HTTP.new(web.host,80)
        http.start do |http|
          request = Net::HTTP::Get.new("#{web.path}#{web.query}")
          response = http.request(request)
          case response
          when Net::HTTPSuccess, Net::HTTPRedirection
            name = Searchy::TEMP + "#{hash_url(web.to_s)}.pdf"
            open(name, "wb") do |file|
              file.write(response.body)
            end
            begin
              receiver = PageTextReceiver.new
              pdf = PDF::Reader.file(name, receiver)
              search_emails(receiver.content.inspect)
            rescue PDF::Reader::UnsupportedFeatureError
              puts "Encrypted PDF: Unable to parse it.\n"
            rescue PDF::Reader::MalformedPDFError
              puts "Malformed PDF: Unable to parse it.\n"
            end
            `rm "#{name}"`
          else
            return response.error!
          end
        end
      rescue Net::HTTPFatalError
        puts "Error: Something went wrong with the HTTP request.\n"
      rescue Net::HTTPServerException
        puts "Error: Not longer there. 404 Not Found.\n"
      rescue
        puts "Error: < .. SocketError .. >\n"
      end
    end
  end
  @threads.each {|t| t.join } if @threads != nil
end

#search_txts(urls) ⇒ Object



165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/esearchy/searchy.rb', line 165

def search_txts(urls)
  while urls.size >= 1
    @threads << Thread.new do 
      web = URI.parse(urls.pop)
      puts "Searching in #{web.to_s.scan(/txt|rtf|ans/i)[0].upcase}: #{web.to_s}\n"
      begin
        http = Net::HTTP.new(web.host,80)
        http.start do |http|
          request = Net::HTTP::Get.new("#{web.path}#{web.query}")
          response = http.request(request)
          case response
          when Net::HTTPSuccess, Net::HTTPRedirection
            search_emails(response.body)
          else
            return response.error!
          end
        end
      rescue Net::HTTPFatalError
        puts "Error: Something went wrong with the HTTP request\n"
      rescue Net::HTTPServerException
        puts "Error: Not longer there. 404 Not Found.\n"
      rescue
        puts "Error: < .... >"
      end
    end
  end
  @threads.each {|t| t.join } if @threads != nil
end