Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary collapse

SPACE =
" "

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



18
19
20
# File 'lib/pdf/reader/page_text_receiver.rb', line 18

def options
  @options
end

#stateObject (readonly)

Returns the value of attribute state.



18
19
20
# File 'lib/pdf/reader/page_text_receiver.rb', line 18

def state
  @state
end

Instance Method Details

#contentObject



53
54
55
# File 'lib/pdf/reader/page_text_receiver.rb', line 53

def content
  PageLayout.new(@characters, @device_mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



89
90
91
92
93
94
95
96
# File 'lib/pdf/reader/page_text_receiver.rb', line 89

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#move_to_next_line_and_show_text(str) ⇒ Object



75
76
77
78
# File 'lib/pdf/reader/page_text_receiver.rb', line 75

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

#page=(page) ⇒ Object

starting a new page



42
43
44
45
46
47
48
49
50
51
# File 'lib/pdf/reader/page_text_receiver.rb', line 42

def page=(page)
  @state = PageState.new(page)
  @page = page
  @content = []
  @characters = []
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
  device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
  device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
  @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



80
81
82
83
84
# File 'lib/pdf/reader/page_text_receiver.rb', line 80

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#show_text(string) ⇒ Object

Text Showing Operators

record text that is drawn on the page



61
62
63
# File 'lib/pdf/reader/page_text_receiver.rb', line 61

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



65
66
67
68
69
70
71
72
73
# File 'lib/pdf/reader/page_text_receiver.rb', line 65

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    else
      @state.process_glyph_displacement(0, arg, false)
    end
  end
end