Module: BLEU
- Defined in:
- lib/zipf/bleu.rb
Defined Under Namespace
Classes: NgramCounts, Ngrams
Class Method Summary
collapse
-
.bleu(counts, n, debug = false) ⇒ Object
-
.brevity_penalty(c, r, smooth = 0.0) ⇒ Object
-
.get_counts(hypothesis, reference, n, times = 1) ⇒ Object
-
.hbleu(counts, n, debug = false) ⇒ Object
-
.per_sentence_bleu(hypothesis, reference, n = 4, smooth = 0.0) ⇒ Object
Class Method Details
.bleu(counts, n, debug = false) ⇒ Object
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
# File 'lib/zipf/bleu.rb', line 85
def BLEU::bleu counts, n, debug=false
corpus_stats = NgramCounts.new n
counts.each { |i| corpus_stats.plus_eq i }
logbleu = 0.0
0.upto(n-1) { |m|
STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]}\n" if debug
return 0.0 if corpus_stats.clipped[m] == 0 or corpus_stats.sum == 0
logbleu += Math.log(corpus_stats.clipped[m]) - Math.log(corpus_stats.sum[m])
}
logbleu /= n
if debug
STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len)}\n"
STDERR.write "sum #{Math.exp(sum)}\n"
end
logbleu += brevity_penalty corpus_stats.hyp_len, corpus_stats.ref_len
return Math.exp logbleu
end
|
.brevity_penalty(c, r, smooth = 0.0) ⇒ Object
81
82
83
|
# File 'lib/zipf/bleu.rb', line 81
def BLEU::brevity_penalty c, r, smooth=0.0
return [0.0, 1.0-((r+smooth)/c)].min
end
|
.get_counts(hypothesis, reference, n, times = 1) ⇒ Object
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
|
# File 'lib/zipf/bleu.rb', line 65
def BLEU::get_counts hypothesis, reference, n, times=1
p = NgramCounts.new n
r = Ngrams.new
ngrams(reference, n) { |ng| r.add ng }
h = Ngrams.new
ngrams(hypothesis, n) { |ng| h.add ng }
h.each { |ng,count|
sz = ng.size-1
p.sum[sz] += count * times
p.clipped[sz] += [r.get_count(ng), count].min * times
}
p.ref_len = tokenize(reference.strip).size * times
p.hyp_len = tokenize(hypothesis.strip).size * times
return p
end
|
.hbleu(counts, n, debug = false) ⇒ Object
103
104
105
|
# File 'lib/zipf/bleu.rb', line 103
def BLEU::hbleu counts, n, debug=false
(100*bleu(counts, n, debug)).round(3)
end
|
.per_sentence_bleu(hypothesis, reference, n = 4, smooth = 0.0) ⇒ Object
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# File 'lib/zipf/bleu.rb', line 107
def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
h_ng = {}; r_ng = {}
(1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] }
ngrams(hypothesis, n) { |i| h_ng[i.size] << i }
ngrams(reference, n) { |i| r_ng[i.size] << i }
m = [n, reference.split.size].min
add = 0.0
logbleu = 0.0
(1).upto(m) { |i|
counts_clipped = 0
counts_sum = h_ng[i].size
h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) }
add = 1.0 if i >= 2
logbleu += Math.log(counts_clipped+add) - Math.log(counts_sum+add);
}
logbleu /= m
logbleu += brevity_penalty hypothesis.strip.split.size, reference.strip.split.size, smooth
return Math.exp logbleu
end
|