Class: CooCoo::DataSources::Xournal::TrainingDocument::DocumentReader

Inherits:
Object
  • Object
show all
Defined in:
lib/coo-coo/data_sources/xournal/training_document/document_reader.rb

Instance Method Summary collapse

Constructor Details

#initializeDocumentReader

Returns a new instance of DocumentReader.



9
10
# File 'lib/coo-coo/data_sources/xournal/training_document/document_reader.rb', line 9

def initialize
end

Instance Method Details

#load(xournal) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/coo-coo/data_sources/xournal/training_document/document_reader.rb', line 12

def load(xournal)
  version, columns, rows, cells_per_example = read_meta_label(xournal)
  
  if columns == nil || rows == nil
    raise ArgumentError.new("Xournal lacks a Text element with '#{META_LABEL} VERSION: COLS ROWS CELLS_PER_EXAMPLE'")
  end

  doc = TrainingDocument.new
  
  xournal.each_page do |page|
    page.each_layer do |layer|
      process_layer(doc, page, layer, columns, rows)
    end
  end

  doc
end

#process_layer(doc, page, layer, columns, rows) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/coo-coo/data_sources/xournal/training_document/document_reader.rb', line 58

def process_layer(doc, page, layer, columns, rows)
  grid_w = page.width / columns.to_f
  grid_h = page.height / rows.to_f

  labels = Hash.new { |h, k| h[k] = Hash.new { |a, b| a[b] = Array.new } }
  strokes = Hash.new { |h, k| h[k] = Hash.new { |a, b| a[b] = Array.new } }
  
  layer.each_text do |txt|
    next if txt.text =~ /^#{META_LABEL}/
    row = (txt.y / grid_h).round
    column = (txt.x / grid_w).round
    labels[row.to_i][column.to_i] << txt
  end

  layer.each_stroke do |stroke|
    color = ChunkyPNG::Color.parse(stroke.color)
    next if ChunkyPNG::Color.euclidean_distance_rgba(color, PARSED_GRID_COLOR) == 0.0
    min, max = stroke.minmax
    row = (min[1] / grid_h)
    column = (min[0] / grid_w)

    strokes[row.to_i][column.to_i] << stroke
  end
  

  rows.times do |row|
    grid_min_y = (row * grid_h).floor

    columns.times do |column|
      grid_min_x = (column * grid_w).floor
      ex_label = labels[row][column].first
      ex_strokes = strokes[row][column]
      unless ex_strokes.empty? && ex_label == nil
        doc.add_example(ex_label && ex_label.text,
                        ex_strokes.collect { |s|
                          s.
                          translate(-grid_min_x, -grid_min_y).
                          scale(1.0 / grid_w, 1.0 / grid_h, 1.0 / grid_w)
                        })
      end
    end
  end
end

#read_meta_label(xournal) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/coo-coo/data_sources/xournal/training_document/document_reader.rb', line 30

def read_meta_label(xournal)
  version = nil
  columns = nil
  rows = nil
  meta = nil
  
  xournal.each_page do |page|
    page.each_layer do |layer|
      layer.each_text do |txt|
        if txt.text =~ /^#{META_LABEL}/
          meta = txt.text
          break
        end
      end
    end
  end

  if meta
    m = meta.match(META_LABEL_REGEX)
    version = m[1].to_f
    columns = m[2].to_i
    rows = m[3].to_i
    cells_per_example = (m[4] || 1).to_i
  end

  return version, columns, rows, cells_per_example
end