Class: HocrReader::Reader

Inherits:
Object
  • Object
show all
Defined in:
lib/hocr_reader/reader.rb

Overview

class reader

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Reader

Returns a new instance of Reader.



13
14
15
16
# File 'lib/hocr_reader/reader.rb', line 13

def initialize(str)
  @string = str
  @html = Nokogiri::HTML(@string)
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args, &block) ⇒ Object



18
19
20
21
22
23
24
# File 'lib/hocr_reader/reader.rb', line 18

def method_missing(name, *args, &block)
  if TAGS[name]
    extract_parts name
  else
    super
  end
end

Instance Attribute Details

#partsObject

Returns the value of attribute parts.



11
12
13
# File 'lib/hocr_reader/reader.rb', line 11

def parts
  @parts
end

Instance Method Details

#convert_to_stringObject

rubocop:enable Metrics/MethodLength, Metrics/AbcSize



52
53
54
55
56
# File 'lib/hocr_reader/reader.rb', line 52

def convert_to_string
  s = ''
  @parts.each { |part| s += part.text + ' ' }
  s
end

#extract_parts(part_name) ⇒ Object

rubocop:disable Metrics/MethodLength, Metrics/AbcSize



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/hocr_reader/reader.rb', line 34

def extract_parts(part_name)
  @parts = []
  tag = TAGS[part_name]
  tag_pair = tag + ', ' + tag
  # example tags 'span.ocrx_word, span.ocrx_word'
  @html.css(tag_pair)
       .reject { |part| part.text.strip.empty? }
       .each do |part|
    title_attributes = part.attributes['title'].value.to_s
                           .split(';')
    language_attribute = part.attributes['lang'].value.to_s if part.attributes['lang']
    this_part = Part.new(part_name, part, title_attributes, language_attribute)
    @parts.push this_part
  end
  @parts
end

#respond_to_missing?(name) ⇒ Boolean

Returns:



26
27
28
29
30
31
# File 'lib/hocr_reader/reader.rb', line 26

def respond_to_missing?(name, *)
  if TAGS[name]
  else
    super
  end
end