Class: Statsample::Graph::Histogram

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/graph/histogram.rb

Overview

In statistics, a histogram is a graphical representation, showing a visual impression of the distribution of experimental data. It is an estimate of the probability distribution of a continuous variable and was first introduced by Karl Pearson [1]. A histogram consists of tabular frequencies, shown as adjacent rectangles, erected over discrete intervals (bins), with an area equal to the frequency of the observations in the interval. The height of a rectangle is also equal to the frequency density of the interval, i.e., the frequency divided by the width of the interval. The total area of the histogram is equal to the number of data.

Usage

Svg output

a = Daru::Vector.new() puts Statsample::Graph::Histogram.new(a).to_svg

Using ReportBuilder

a = Daru::Vector.new([1,2,3,4])
rb=ReportBuilder.new
rb.add(Statsample::Graph::Histogram.new(a))
rb.save_html('histogram.html')

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(data, opts = Hash.new) ⇒ Histogram

data could be a vector or a histogram


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/statsample/graph/histogram.rb', line 47

def initialize(data, opts=Hash.new)
  prov_name=(data.respond_to?(:name)) ? data.name : ""
  opts_default={
    :name=>_("Histograma (%s)") % prov_name,
    :width=>400,
    :height=>300,
    :margin_top=>10,
    :margin_bottom=>20,
    :margin_left=>30,
    :margin_right=>20,
    :minimum_x=>nil,
    :maximum_x=>nil,
    :minimum_y=>nil,
    :maximum_y=>nil,
    :bins=>nil,
    :line_normal_distribution=>false
  }
  @opts=opts_default.merge(opts)
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
  @data=data
end

Instance Attribute Details

#binsObject

Could be an array of ranges or number of bins


35
36
37
# File 'lib/statsample/graph/histogram.rb', line 35

def bins
  @bins
end

#heightObject

Total height


24
25
26
# File 'lib/statsample/graph/histogram.rb', line 24

def height
  @height
end

#histObject (readonly)

Returns the value of attribute hist


33
34
35
# File 'lib/statsample/graph/histogram.rb', line 33

def hist
  @hist
end

#line_normal_distributionObject

Add a line showing normal distribution


45
46
47
# File 'lib/statsample/graph/histogram.rb', line 45

def line_normal_distribution
  @line_normal_distribution
end

#margin_bottomObject

Bottom margin


28
29
30
# File 'lib/statsample/graph/histogram.rb', line 28

def margin_bottom
  @margin_bottom
end

#margin_leftObject

Left margin


30
31
32
# File 'lib/statsample/graph/histogram.rb', line 30

def margin_left
  @margin_left
end

#margin_rightObject

Right margin


32
33
34
# File 'lib/statsample/graph/histogram.rb', line 32

def margin_right
  @margin_right
end

#margin_topObject

Top margin


26
27
28
# File 'lib/statsample/graph/histogram.rb', line 26

def margin_top
  @margin_top
end

#maximum_xObject

Maximum value on x axis. Calculated automaticly from data if not set


39
40
41
# File 'lib/statsample/graph/histogram.rb', line 39

def maximum_x
  @maximum_x
end

#maximum_yObject

Maximum value on y axis. Calculated automaticly from data if not set.


43
44
45
# File 'lib/statsample/graph/histogram.rb', line 43

def maximum_y
  @maximum_y
end

#minimum_xObject

Minimum value on x axis. Calculated automaticly from data if not set


37
38
39
# File 'lib/statsample/graph/histogram.rb', line 37

def minimum_x
  @minimum_x
end

#minimum_yObject

Minimum value on y axis. Set to 0 if not set


41
42
43
# File 'lib/statsample/graph/histogram.rb', line 41

def minimum_y
  @minimum_y
end

#nameObject

Histogram name


20
21
22
# File 'lib/statsample/graph/histogram.rb', line 20

def name
  @name
end

#widthObject

Total width


22
23
24
# File 'lib/statsample/graph/histogram.rb', line 22

def width
  @width
end

Instance Method Details

#pre_visObject

:nodoc:


68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/statsample/graph/histogram.rb', line 68

def pre_vis # :nodoc:
  if @data.is_a? Statsample::Histogram
    @hist=@data
    @mean=@hist.estimated_mean
    @sd=@hist.estimated_standard_deviation
  elsif @data.is_a? Daru::Vector
    @mean=@data.mean
    @sd=@data.sd
    @bins||=Math::sqrt(@data.size).floor
    @hist=@data.histogram(@bins)
  end
end

#report_building(builder) ⇒ Object

:nodoc:


181
182
183
184
185
# File 'lib/statsample/graph/histogram.rb', line 181

def report_building(builder) # :nodoc:
  builder.section(:name=>name) do |b|
    b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
  end
end

#report_building_text(generator) ⇒ Object


186
187
188
189
190
191
192
193
194
195
# File 'lib/statsample/graph/histogram.rb', line 186

def report_building_text(generator)
  pre_vis
  #anchor=generator.toc_entry(_("Histogram %s") % [@name])
  step=  @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
    
  @hist.range.each_with_index do |r,i|
    next if i==@hist.bins
    generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
  end
end

#rubyvis_normal_distribution(pan) ⇒ Object


80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/statsample/graph/histogram.rb', line 80

def rubyvis_normal_distribution(pan)
  x_scale=@x_scale
  y_scale=@y_scale
  
  wob = @hist.get_range(0)[1] - @hist.get_range(0)[0]
  
  nob = ((@maximum_x-@minimum_x) / wob.to_f).floor
  sum=@hist.sum
  
  data=nob.times.map {|i|
    l=@minimum_x+i*wob
    r=@minimum_x+(i+1)*wob          
    middle=(l+r) / 2.0
    pi=Distribution::Normal.cdf((r-@mean) / @sd) - Distribution::Normal.cdf((l-@mean) / @sd)
    {:x=>middle, :y=>pi*sum}
  }
  pan.line do |l|
    l.data data
    l.interpolate "cardinal"
    l.stroke_style "black"
    l.bottom {|d| y_scale[d[:y]]}
    l.left {|d| x_scale[d[:x]]}
  end
  
end

#rubyvis_panelObject

Returns a Rubyvis panel with scatterplot


106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/statsample/graph/histogram.rb', line 106

def rubyvis_panel # :nodoc:
  pre_vis
  #that=self
  
  @minimum_x||=@hist.min
  @maximum_x||=@hist.max
  @minimum_y||=0
  @maximum_y||=@hist.max_val
  
  margin_hor=margin_left + margin_right
  margin_vert=margin_top  + margin_bottom

  x_scale = pv.Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)

  y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
  
  y_scale.nice
  
  bins=@hist.bins.times.map {|i|
    {
     :low =>@hist.get_range(i)[0],
     :high=>@hist.get_range(i)[1],
     :value=>@hist.bin[i]
    }
  }
  @x_scale=x_scale
  @y_scale=y_scale
  # cache data
  vis=Rubyvis::Panel.new do |pan| 
    pan.width  width  - margin_hor
    pan.height height - margin_vert
    pan.bottom margin_bottom
    pan.left   margin_left
    pan.right  margin_right
    pan.top    margin_top
     # Y axis
    pan.rule do
      data y_scale.ticks
      bottom y_scale
      stroke_style {|d| d!=0 ? "#eee" : "#000"}
      label(:anchor=>'left') do
        text y_scale.tick_format
      end
    end
    # X axis
    pan.rule do
      data x_scale.ticks
      left x_scale
      stroke_style "black"
      height 5
      bottom(-5)
      label(:anchor=>'bottom') do
        text x_scale.tick_format
      end
    end
   
    pan.bar do |bar|
      bar.data(bins)
      bar.left {|v| x_scale[v[:low]]}
      bar.width {|v| x_scale[v[:high]] - x_scale[v[:low]]}
      bar.bottom 0
      bar.height {|v| y_scale[v[:value]]}
      bar.stroke_style "black"
      bar.line_width 1
    end
     rubyvis_normal_distribution(pan) if @line_normal_distribution
  end
  vis
end

#to_svgObject

Returns SVG with scatterplot


176
177
178
179
180
# File 'lib/statsample/graph/histogram.rb', line 176

def to_svg
  rp=rubyvis_panel
  rp.render
  rp.to_svg
end