Module: BLEU

Defined in:
lib/zipf/bleu.rb

Defined Under Namespace

Classes: NgramCounts, Ngrams

Class Method Summary collapse

Class Method Details

.bleu(counts, n, debug = false) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/zipf/bleu.rb', line 85

def BLEU::bleu counts, n, debug=false
  corpus_stats = NgramCounts.new n
  counts.each { |i| corpus_stats.plus_eq i }
  logbleu = 0.0
  0.upto(n-1) { |m|
    STDERR.write "#{m+1} #{corpus_stats.clipped[m]} / #{corpus_stats.sum[m]}\n" if debug
    return 0.0 if corpus_stats.clipped[m] == 0 or corpus_stats.sum == 0
    logbleu += Math.log(corpus_stats.clipped[m]) - Math.log(corpus_stats.sum[m])
  }
  logbleu /= n
  if debug
    STDERR.write "BP #{brevity_penalty(corpus_stats.hyp_len, corpus_stats.ref_len)}\n"
    STDERR.write "sum #{Math.exp(sum)}\n"
  end
  logbleu += brevity_penalty corpus_stats.hyp_len, corpus_stats.ref_len
  return Math.exp logbleu
end

.brevity_penalty(c, r, smooth = 0.0) ⇒ Object



81
82
83
# File 'lib/zipf/bleu.rb', line 81

def BLEU::brevity_penalty c, r, smooth=0.0
  return [0.0, 1.0-((r+smooth)/c)].min
end

.get_counts(hypothesis, reference, n, times = 1) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/zipf/bleu.rb', line 65

def BLEU::get_counts hypothesis, reference, n, times=1
  p = NgramCounts.new n
  r = Ngrams.new
  ngrams(reference, n) { |ng| r.add ng }
  h = Ngrams.new
  ngrams(hypothesis, n) { |ng| h.add ng }
  h.each { |ng,count|
    sz = ng.size-1
    p.sum[sz] += count * times
    p.clipped[sz] += [r.get_count(ng), count].min * times
  }
  p.ref_len = tokenize(reference.strip).size * times
  p.hyp_len = tokenize(hypothesis.strip).size * times
  return p
end

.hbleu(counts, n, debug = false) ⇒ Object



103
104
105
# File 'lib/zipf/bleu.rb', line 103

def BLEU::hbleu counts, n, debug=false
  (100*bleu(counts, n, debug)).round(3)
end

.per_sentence_bleu(hypothesis, reference, n = 4, smooth = 0.0) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/zipf/bleu.rb', line 107

def BLEU::per_sentence_bleu hypothesis, reference, n=4, smooth=0.0
  h_ng = {}; r_ng = {}
  (1).upto(n) { |i| h_ng[i] = []; r_ng[i] = [] }
  ngrams(hypothesis, n) { |i| h_ng[i.size] << i }
  ngrams(reference, n) { |i| r_ng[i.size] << i }
  m = [n, reference.split.size].min
  add = 0.0
  logbleu = 0.0
  (1).upto(m) { |i|
    counts_clipped = 0
    counts_sum = h_ng[i].size
    h_ng[i].uniq.each { |j| counts_clipped += r_ng[i].count(j) }
    add = 1.0 if i >= 2
    logbleu += Math.log(counts_clipped+add) - Math.log(counts_sum+add);
  }
  logbleu /= m
  logbleu += brevity_penalty hypothesis.strip.split.size, reference.strip.split.size, smooth
  return Math.exp logbleu
end