Class: Statsample::Graph::Boxplot

Inherits:
Object
  • Object
show all
Includes:
Summarizable
Defined in:
lib/statsample/graph/boxplot.rb

Overview

Boxplot

From Wikipedia: In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.

Usage

Svg output

a = Daru::Vector.new([1,2,3,4])
b = Daru::Vector.new([3,4,5,6])

puts Statsample::Graph::Boxplot.new(:vectors=>).to_svg

Using ReportBuilder

a = Daru::Vector.new([1,2,3,4])
b = Daru::Vector.new([3,4,5,6])
rb=ReportBuilder.new
rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
rb.save_html('boxplot.html')

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Summarizable

#summary

Constructor Details

#initialize(opts = Hash.new) ⇒ Boxplot

Create a new Boxplot. Parameters: Hash of options

  • :vectors: Array of vectors

  • :groups: Array of same size as :vectors:, with name of groups

    to colorize vectors
    


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/statsample/graph/boxplot.rb', line 58

def initialize(opts=Hash.new)
  @vectors=opts.delete :vectors
  raise "You should define vectors" if @vectors.nil?
  
  opts_default={
    :name=>_("Boxplot"),
    :groups=>nil,
    :width=>400,
    :height=>300,
    :margin_top=>10,
    :margin_bottom=>20,
    :margin_left=>20,
    :margin_right=>20,
    :minimum=>nil,
    :maximum=>nil,
    :label_angle=>0
  }
  @opts=opts_default.merge(opts)
  opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
end

Instance Attribute Details

#groupsObject

Array with assignation to groups of bars For example, for four vectors,

boxplot.groups=[1,2,1,3]

Assign same color to first and third element, and different to second and fourth



41
42
43
# File 'lib/statsample/graph/boxplot.rb', line 41

def groups
  @groups
end

#heightObject

Total height of Boxplot



27
28
29
# File 'lib/statsample/graph/boxplot.rb', line 27

def height
  @height
end

#label_angleObject

The rotation angle, in radians. Text is rotated clockwise relative to the anchor location. For example, with the default left alignment, an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.



51
52
53
# File 'lib/statsample/graph/boxplot.rb', line 51

def label_angle
  @label_angle
end

#margin_bottomObject

Bottom margin



31
32
33
# File 'lib/statsample/graph/boxplot.rb', line 31

def margin_bottom
  @margin_bottom
end

#margin_leftObject

Left margin



33
34
35
# File 'lib/statsample/graph/boxplot.rb', line 33

def margin_left
  @margin_left
end

#margin_rightObject

Right margin



35
36
37
# File 'lib/statsample/graph/boxplot.rb', line 35

def margin_right
  @margin_right
end

#margin_topObject

Top margin



29
30
31
# File 'lib/statsample/graph/boxplot.rb', line 29

def margin_top
  @margin_top
end

#maximumObject

Maximum value on y-axis. Automaticly defined from data



45
46
47
# File 'lib/statsample/graph/boxplot.rb', line 45

def maximum
  @maximum
end

#minimumObject

Minimum value on y-axis. Automaticly defined from data



43
44
45
# File 'lib/statsample/graph/boxplot.rb', line 43

def minimum
  @minimum
end

#nameObject

Returns the value of attribute name.



23
24
25
# File 'lib/statsample/graph/boxplot.rb', line 23

def name
  @name
end

#vectorsObject

Vectors to box-ploting



47
48
49
# File 'lib/statsample/graph/boxplot.rb', line 47

def vectors
  @vectors
end

#widthObject

Total width of Boxplot



25
26
27
# File 'lib/statsample/graph/boxplot.rb', line 25

def width
  @width
end

#x_scaleObject (readonly)

Returns the value of attribute x_scale.



52
53
54
# File 'lib/statsample/graph/boxplot.rb', line 52

def x_scale
  @x_scale
end

#y_scaleObject (readonly)

Returns the value of attribute y_scale.



52
53
54
# File 'lib/statsample/graph/boxplot.rb', line 52

def y_scale
  @y_scale
end

Instance Method Details

#report_building(builder) ⇒ Object

:nodoc:



229
230
231
232
233
# File 'lib/statsample/graph/boxplot.rb', line 229

def report_building(builder) # :nodoc:
  builder.section(:name=>name) do |b|
    b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
  end
end

#rubyvis_panelObject

Returns a Rubyvis panel with scatterplot



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/statsample/graph/boxplot.rb', line 80

def rubyvis_panel # :nodoc:
  that=self
  
  min,max=@minimum, @maximum
  
  min||=@vectors.map {|v| v.min}.min
  max||=@vectors.map {|v| v.max}.max
  
  margin_hor=margin_left + margin_right
  margin_vert=margin_top  + margin_bottom
  x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
  y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
  y_scale.nice
  # cache data
  
  colors=Rubyvis::Colors.category10
  
  data=@vectors.map {|v|
    out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
    out[:iqr]=out[:percentil_75] - out[:percentil_25]
    
    irq_max=out[:percentil_75] + out[:iqr]
    irq_min=out[:percentil_25] - out[:iqr]
    
    # Find the last data inside the margin
    min = out[:percentil_25]
    max = out[:percentil_75]
    
    v.each {|d|
      min=d if d < min and d > irq_min
      max=d if d > max and d < irq_max
    }
    # Whiskers!
    out[:low_whisker]=min
    out[:high_whisker]=max
    # And now, data outside whiskers
    out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
    out
  }
         
  vis=Rubyvis::Panel.new do |pan| 
    pan.width  width  - margin_hor
    pan.height height - margin_vert
    pan.bottom margin_bottom
    pan.left   margin_left
    pan.right  margin_right
    pan.top    margin_top
     # Y axis
    pan.rule do
      data y_scale.ticks
      bottom y_scale
      stroke_style {|d| d!=0 ? "#eee" : "#000"}
      label(:anchor=>'left') do
        text y_scale.tick_format
      end
    end
    pan.rule do
      bottom 0
      stroke_style 'black'
    end
    
    # Labels
    
    pan.label  do |l|
      l.data data
      l.text_angle that.label_angle
      l.left  {|v| x_scale[index] }
      l.bottom(-15)
      l.text {|v,x| v[:name]}
    end
    
    pan.panel do |bp|
      bp.data data
      bp.left {|v|  x_scale[index]}
      bp.width x_scale.range_band
      
      # Bar
      bp.bar do |b|
        b.bottom {|v| y_scale[v[:percentil_25]]}
        b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] }
        b.line_width 1
        b.stroke_style  {|v| 
          if that.groups
            colors.scale(that.groups[parent.index]).darker
          else
            colors.scale(index).darker
          end  
        }
        b.fill_style {|v| 
          if that.groups
            colors.scale(that.groups[parent.index])
          else
            colors.scale(index)
          end
        }
      end
      # Median
      bp.rule do |r|
        r.bottom {|v| y_scale[v[:median]]}
        r.width x_scale.range_band
        r.line_width 2
      end
      ##
      # Whiskeys
      ##
      # Low whiskey
      bp.rule do |r|
        r.visible {|v| v[:percentil_25] > v[:low_whisker]}
        r.bottom {|v| y_scale[v[:low_whisker]]}              
      end
      
      bp.rule do |r|
        r.visible {|v| v[:percentil_25] > v[:low_whisker]}
        r.bottom {|v| y_scale[v[:low_whisker]]}              
        r.left {|v| x_scale.range_band / 2.0}
        r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])}
      end
      # High whiskey

      bp.rule do |r|
        r.visible {|v| v[:percentil_75] < v[:high_whisker]}
        r.bottom {|v| y_scale.scale(v[:high_whisker])}              
      end
      
       bp.rule do |r|
        r.visible {|v| v[:percentil_75] < v[:high_whisker]}
        r.bottom {|v| y_scale.scale(v[:percentil_75])}              
        r.left {|v| x_scale.range_band / 2.0}
        r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])}
      end
      # Outliers
      bp.dot do |dot|
        dot.shape_size 4
        dot.data {|v| v[:outliers]}
        dot.left {|v| x_scale.range_band / 2.0}
        dot.bottom {|v| y_scale.scale(v)}
        dot.title {|v| v}
      end
    end
  end
  vis
end

#to_svgObject

Returns SVG with scatterplot



224
225
226
227
228
# File 'lib/statsample/graph/boxplot.rb', line 224

def to_svg
  rp=rubyvis_panel
  rp.render
  rp.to_svg
end