Class: OCRElement

Inherits:
HOCRBox show all
Includes:
Enumerable
Defined in:
lib/ocr_element.rb

Direct Known Subclasses

OCRBlock, OCRLine, OCRPage, OCRParagraph, OCRWord

Instance Attribute Summary collapse

Attributes inherited from HOCRBox

#bottom, #coordinates, #height, #left, #right, #top, #width

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from HOCRBox

#bottom_distance_to, #coordinates_to_s, #enclosed_by?, #encloses?, #left_distance_to, #left_of?, #right_distance_to, #right_of?, #to_css_style, #top_distance_to

Constructor Details

#initialize(ocr_class, children, coordinates) ⇒ OCRElement

Returns a new instance of OCRElement.



68
69
70
71
72
73
# File 'lib/ocr_element.rb', line 68

def initialize(ocr_class, children, coordinates)
    @children = children
    @ocr_class = ocr_class
    @features = []
    super coordinates
end

Instance Attribute Details

#childrenObject (readonly)

Returns the value of attribute children.



8
9
10
# File 'lib/ocr_element.rb', line 8

def children
  @children
end

#featuresObject

Returns the value of attribute features.



9
10
11
# File 'lib/ocr_element.rb', line 9

def features
  @features
end

#ocr_classObject (readonly)

Returns the value of attribute ocr_class.



8
9
10
# File 'lib/ocr_element.rb', line 8

def ocr_class
  @ocr_class
end

Class Method Details

.create(ocr_element_html) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/ocr_element.rb', line 16

def create(ocr_element_html)
    ocr_class   = extract_ocr_class(ocr_element_html)
    coordinates = extract_coordinates(ocr_element_html)

    unless ocr_class == 'ocrx_word'
       children = extract_children(ocr_element_html)
    else
       children = extract_word_children(ocr_element_html)
    end

    case ocr_class
    when 'ocrx_block' then
       OCRBlock.new(ocr_class,children,coordinates)
    when 'ocr_par' then
       OCRParagraph.new(ocr_class,children,coordinates)
    when 'ocr_line' then
       OCRLine.new(ocr_class,children,coordinates)
    when 'ocrx_word' then
       OCRWord.new(ocr_class,children,coordinates)
    else
       OCRElement.new(ocr_class,children,coordinates)
    end
end

.create_from_html(ocr_element_html) ⇒ Object



12
13
14
# File 'lib/ocr_element.rb', line 12

def create_from_html(ocr_element_html)
    create ocr_element_html
end

.extract_children(ocr_element_html) ⇒ Object



44
45
46
47
48
49
50
51
# File 'lib/ocr_element.rb', line 44

def extract_children(ocr_element_html)
       children = []
       for child_fragment_html in ocr_element_html.elements do
               children << OCRElement.create(child_fragment_html)
       end
       #br Elemente ausfiltern
       children.reject { |child| child.ocr_class == nil}
end

.extract_coordinates(ocr_element_html) ⇒ Object



54
55
56
# File 'lib/ocr_element.rb', line 54

def extract_coordinates(ocr_element_html)
   extract_coordinates_from_string ocr_element_html['title']
end

.extract_coordinates_from_string(s) ⇒ Object



58
59
60
61
# File 'lib/ocr_element.rb', line 58

def extract_coordinates_from_string(s)
   s =~ /bbox (\d+) (\d+) (\d+) (\d+)/
   [$1, $2, $3, $4]
end

.extract_ocr_class(ocr_element_html) ⇒ Object



63
64
65
# File 'lib/ocr_element.rb', line 63

def extract_ocr_class(ocr_element_html)
   ocr_element_html['class']
end

.extract_word_children(ocr_element_html) ⇒ Object



40
41
42
# File 'lib/ocr_element.rb', line 40

def extract_word_children(ocr_element_html)
    [ocr_element_html.text]
end

Instance Method Details

#css_class_stringObject



89
90
91
92
93
94
95
# File 'lib/ocr_element.rb', line 89

def css_class_string
    if @features.empty?
        "#{@ocr_class}"
    else
        "#{@ocr_class}-#{features_to_css_class}"
    end
end

#eachObject



75
76
77
78
79
# File 'lib/ocr_element.rb', line 75

def each
    children.each do |child|
        yield child
    end
end

#features_to_css_classObject



109
110
111
# File 'lib/ocr_element.rb', line 109

def features_to_css_class
    @features.uniq.sort.join('_')
end

#mark_in_rspec(color) ⇒ Object



85
86
87
# File 'lib/ocr_element.rb', line 85

def mark_in_rspec(color)
    "<span style='color: #{color}'>#{to_s}</span>"
end

#to_html(display_class = css_class_string, style = nil) ⇒ Object



104
105
106
107
# File 'lib/ocr_element.rb', line 104

def to_html( display_class = css_class_string, style = nil )
     children_html = @children.map {|c| c.to_html}.join("")
    "<span class='#{ display_class }'> #{ children_html } </span>"
end

#to_image_html(options = {}) ⇒ Object



97
98
99
100
101
102
# File 'lib/ocr_element.rb', line 97

def to_image_html(options = {})
    zoom = options[:zoom] || 1
    display_class = options[:css_class] || css_class_string
    children_html = @children.map {|c| c.to_image_html(:zoom => zoom) }.join("")
    "<span class='#{ display_class }' style='#{ to_css_style(zoom) }' ></span>#{ children_html }"
end

#to_sObject



81
82
83
# File 'lib/ocr_element.rb', line 81

def to_s
    "#{self.class}:#{@features}#{ coordinates_to_s }->\n" + children.map { |c| "\t#{c.to_s}" }.join("\n")
end