Module: Scotchit

Defined in:
lib/scotchit.rb,
lib/scotchit/version.rb

Constant Summary collapse

DB =

seeding the hashes

{}
Stats =
{}
Price =
{}
VERSION =
"1.0.0"

Class Method Summary collapse

Class Method Details

.ci_lower_bound(pos, n, confidence) ⇒ Object

In order with the philosophy of reddit, this program evaluates the ratings that redditors have given to all of the whiskys submitted to review by executing a Wilson Score confidence interval (lower bound). This is the same algroithm that reddit users interally for ratings and thus what you see when you visit the site.

Evan Miller’s Wilson Score Interval fn



33
34
35
36
37
38
39
40
# File 'lib/scotchit.rb', line 33

def ci_lower_bound(pos, n, confidence)
    if n == 0
        return 0
    end
    z = Statistics2.pnormaldist(1-(1-confidence)/2)
    phat = 1.0*pos/n
    (phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
end

.parse_csv(file_name) ⇒ Object

grok the ratings.csv file



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/scotchit.rb', line 44

def parse_csv(file_name)
    CSV.foreach(file_name, headers:true) do |row|
        # ignore non-Scotch things
        if %w[Bourbon Rye Grain Tennessee Liqueur Wheat].include? row['Region']
            next
        end
        # clean up key name
        name = row["Whisky Name"].strip
        # get the rating as an integer
        val = row['Rating'].to_i
        # get the cost as a floating point by removing non-digits and throwing out
        # un-american funny money
        if !%w[£ CDN CAD AUD  GBP NZD EUR CAN].any? {|x| row['Price'].to_s.upcase.include? x} #ugly
            cost = row['Price'].to_s.gsub(/[^\d\.]/, '').to_f
        else
            cost = 0.0
        end
        # seed key:val
        if !DB.has_key?(name)
            DB[name] = []
            Price[name] = []
        end
        # append score
        DB[name] << ((val == nil) ? 0 : val)
        if cost > 20.0
            Price[name] << cost
        end
    end
end

.runObject

reverse sort by confidence score and print to term

we are only displaying whole number percentages in order to avoid the presception that this is accurate to some decimal of score. we’re trying to select scotches that have good confidence of being yummy and deserving of the price via the sample of redditors.



102
103
104
105
106
# File 'lib/scotchit.rb', line 102

def run()
    parse_csv("lib/ratings.csv")
    score()
    Stats.sort_by {|k,i| -i[0]}.each {|k,i| (i[0] > 20.0 && (puts "#{k}: #{i[0]}% (#{i[1]}+, #{i[2]}#) #{i[3]}")) || nil}
end

.scoreObject

do the confidence interval computations for all scotches meeting our thresholds.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/scotchit.rb', line 76

def score()
    DB.keys.each do |k|
        if DB[k].count > 5 # only eval whisky that has 6 or more reviews on reddit
            pos = DB[k].count {|x| x > 90 } # threshold as positive vote 91+% rating
            # calculate the Price indicator
            average = (Price[k] != []) ? Price[k].reduce(:+) / Price[k].count : 0.0
            indicator = case average.round(0)
            when 0 then "?"
            when 1..39 then "$"
            when 40..69 then "$$"
            when 70..89 then "$$$"
            when 90..120 then "$$$$"
            else
                "$$$$$"
            end
            Stats[k] = [(ci_lower_bound(pos, DB[k].count, 0.975) * 100.0).round(0), pos, DB[k].count, indicator]
        end
    end
end