Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary collapse

SPACE =
" "

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 17

def options
  @options
end

#stateObject (readonly)

Returns the value of attribute state.



17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 17

def state
  @state
end

Instance Method Details

#contentObject



48
49
50
# File 'lib/pdf/reader/page_text_receiver.rb', line 48

def content
  PageLayout.new(@characters, @mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



84
85
86
87
88
89
90
91
# File 'lib/pdf/reader/page_text_receiver.rb', line 84

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#move_to_next_line_and_show_text(str) ⇒ Object



70
71
72
73
# File 'lib/pdf/reader/page_text_receiver.rb', line 70

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

#page=(page) ⇒ Object

starting a new page



41
42
43
44
45
46
# File 'lib/pdf/reader/page_text_receiver.rb', line 41

def page=(page)
  @state = PageState.new(page)
  @content = []
  @characters = []
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



75
76
77
78
79
# File 'lib/pdf/reader/page_text_receiver.rb', line 75

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#show_text(string) ⇒ Object

Text Showing Operators

record text that is drawn on the page



56
57
58
# File 'lib/pdf/reader/page_text_receiver.rb', line 56

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



60
61
62
63
64
65
66
67
68
# File 'lib/pdf/reader/page_text_receiver.rb', line 60

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    else
      @state.process_glyph_displacement(0, arg, false)
    end
  end
end