Class: WordCounter

Inherits:
Object
  • Object
show all
Defined in:
lib/word_counter.rb,
lib/word_counter/version.rb

Constant Summary collapse

VERSION =
"0.3.0"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg, show_sentences = false, colorize = false) ⇒ WordCounter

WordCounter!

Parameters:

  • filename (String)

    The path and filename of the file to analyze

  • show_sentences (Boolean) (defaults to: false)

    (default: false) If true, WordCounter will print out the sentences which contain the counted word in question

Raises:

  • (ArgumentError)


17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/word_counter.rb', line 17

def initialize arg, show_sentences = false, colorize = false
  raise ArgumentError, "Please supply a URL or file path." unless arg
  @show_sentences = show_sentences
  @colorize = colorize

  begin
    # try to open it as a file
    @hashified_words = WordCounter.analyze_file arg
  rescue NoFileError => e
    # try to analyze it as a website, so curl it
    @hashified_words = WordCounter.analyze_website arg
  end
end

Class Method Details

.analyze_file(file) ⇒ Object

Opens a file and analyzes it

Parameters:

  • file (String)

    A path to a file

Raises:



129
130
131
132
133
134
135
136
137
138
139
# File 'lib/word_counter.rb', line 129

def self.analyze_file file
  raise NoFileError, "File does not exist!" unless File.exist? file

  hashified_words = nil

  @file = File.open file do |file|
    hashified_words = hashify_words file
  end

  hashified_words
end

.analyze_website(arg) ⇒ Object

Vists a website and analyzes it

Parameters:

  • arg (String)

    A website URL

Raises:



110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/word_counter.rb', line 110

def self.analyze_website arg
  url = WordCounter.urlize arg
  res = WordCounter.fetch url
  raise NoWebsiteError unless res.code == '200'

  doc = Nokogiri::HTML res.body
  doc.search('script').remove
  doc.search('meta').remove
  doc.search('style').remove
  text = doc.text
  hashify_words text
end

.fetch(uri_str, limit = 10) ⇒ Object

Fetch a url

Parameters:

  • uri_str (String)

    A URI

Raises:

  • (ArgumentError)


74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/word_counter.rb', line 74

def self.fetch(uri_str, limit = 10)
  raise ArgumentError, 'too many HTTP redirects' if limit == 0

  uri = URI uri_str
  response = Net::HTTP.get_response uri

  case response
  when Net::HTTPSuccess then
    response
  when Net::HTTPRedirection then
    location = response['location']
    warn "redirected to #{location}"
    fetch(location, limit - 1)
  else
    response.value
  end
end

.hashify_words(string) ⇒ Object

Builds the data structures we use for our analysis.

Parameters:

  • string (File)

    The string we’re analyzing (notice: can also be a File object, because each_line also works with Files.)



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/word_counter.rb', line 145

def self.hashify_words string
  hash = {}

  string.each_line do |line|
    words = line.split(/\W+/).reject { |w| w.empty? }

    words.each do |word|
      sym = word.to_sym

      if hash[sym] == nil
        hash[sym] = {
          count: 1,
          lines: [line.strip]
        }
      else
        hash[sym][:count] += 1
        hash[sym][:lines].push(line.strip).uniq!
        hash[sym][:lines].sort!
      end
    end
  end

  hash
end

.urlize(arg) ⇒ Object

Prepends an http:// if there isn’t one.

Parameters:

  • arg (String)


97
98
99
100
101
102
103
# File 'lib/word_counter.rb', line 97

def self.urlize arg
  if arg =~ /^(http:\/\/|https:\/\/)/
    arg
  else
    "http://#{arg}"
  end
end

Instance Method Details

#colorize(str, options) ⇒ Object



39
40
41
# File 'lib/word_counter.rb', line 39

def colorize str, options
  @colorize ? str.to_s.colorize(options) : str
end

#reportObject

Prints a report to stdout



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/word_counter.rb', line 46

def report
  # hashified_words_with_sorted_lines = @hashified_words.each do |word, data|
  #   data[:lines].sort
  # end

  sorted_hash = @hashified_words.sort_by { |word, data|
    [-data[:count], word]
  }

  puts 'Results:'
  sorted_hash.each do |word, data|
    puts(colorize "#{data[:count]} #{word}", color: :green, mode: :bold)

    i = 0
    lines = data[:lines].map { |l|
      i += 1
      "#{colorize i, color: :red}: #{l}"
    }.join("\n    ")

    puts "    #{lines}" if show_sentences?
  end
end

#show_sentences?Boolean

Helper method

Returns:

  • (Boolean)


34
35
36
# File 'lib/word_counter.rb', line 34

def show_sentences?
  @show_sentences
end