Module: EmailCollector

Defined in:
lib/email_collector.rb

Class Method Summary collapse

Class Method Details

.collect(searchReq, domain = nil) ⇒ Object



19
20
21
# File 'lib/email_collector.rb', line 19

def self.collect(searchReq, domain = nil)
  @keywords.map { |keyword| collect_plain("#{searchReq} #{keyword}", domain) }.flatten.uniq.compact
end

.collect_plain(searchReq, domain = nil) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/email_collector.rb', line 23

def self.collect_plain(searchReq, domain = nil)
  #@logger.debug("searching for #{searchReq}")
  #@logger.debug("domain = #{domain}")
  
  if (domain)
    res = google_search("#{searchReq} \"#{domain}\"")
    res_at = google_search("#{searchReq} \"at #{domain}\"")
    
    (res + res_at).map do |context|
      #@logger.debug("context = #{context}")
      context = filter_at_domain(context, domain)
      context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@#{Regexp.quote(domain)}/i)
    end
  else
    google_search(searchReq).map do |context|
      @logger.debug("context = #{context}")
      context.scan(/[a-z0-9._%+-]*[a-z0-9_%+]@(?:[a-z0-9.-]+\.)+[a-z0-9]{2,}/i)
    end
  end
end

.filter_at(s) ⇒ Object

Replaces ‘at’ with @



60
61
62
# File 'lib/email_collector.rb', line 60

def self.filter_at(s)
  s.gsub(/\s+/, ' ').gsub(/[^a-z0-9_.%+-]+[ae]t[^a-z0-9.@-]+|([_+-]+)[ae]t\1/i, '@')
end

.filter_at_domain(s, domain) ⇒ Object

Replaces ***gmail.com with @gmail.com



65
66
67
# File 'lib/email_collector.rb', line 65

def self.filter_at_domain(s, domain)
  s.gsub(/[^a-z0-9_%+-]+#{Regexp.quote(domain)}/, '@' + domain)
end

.filter_b(s) ⇒ Object

Fixes google-search gem bold outline



75
76
77
# File 'lib/email_collector.rb', line 75

def self.filter_b(s)
  s.gsub(/<\/?b>/, '')
end

.filter_exclam(s) ⇒ Object

Transforms gmail!com addresses



70
71
72
# File 'lib/email_collector.rb', line 70

def self.filter_exclam(s)
  s.gsub(/[!:]/, '.')
end

.google_search(searchReq) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/email_collector.rb', line 44

def self.google_search(searchReq)
  @logger.debug("searching for #{searchReq}")

  Google::Search::Web.new do |search|
    search.query = searchReq
    search.size = @size
  end.map do |item|
    #@logger.debug("URI = #{item.uri}")
    #@logger.debug(item.content)
    s = filter_b(item.content)
    s = filter_at(s)
    filter_exclam(s).split('...')
  end.flatten
end

.keywords=(k) ⇒ Object



15
16
17
# File 'lib/email_collector.rb', line 15

def self.keywords=(k)
  @keywords = k
end

.size=(s) ⇒ Object



9
10
11
# File 'lib/email_collector.rb', line 9

def self.size=(s)
  @size = s
end