Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary collapse

SPACE =
" "

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



19
20
21
# File 'lib/pdf/reader/page_text_receiver.rb', line 19

def options
  @options
end

#stateObject (readonly)

Returns the value of attribute state.



19
20
21
# File 'lib/pdf/reader/page_text_receiver.rb', line 19

def state
  @state
end

Instance Method Details

#contentObject



50
51
52
53
# File 'lib/pdf/reader/page_text_receiver.rb', line 50

def content
  mediabox = @page.rectangles[:MediaBox].to_a
  PageLayout.new(@characters, mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



87
88
89
90
91
92
93
94
# File 'lib/pdf/reader/page_text_receiver.rb', line 87

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#move_to_next_line_and_show_text(str) ⇒ Object



73
74
75
76
# File 'lib/pdf/reader/page_text_receiver.rb', line 73

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

#page=(page) ⇒ Object

starting a new page



43
44
45
46
47
48
# File 'lib/pdf/reader/page_text_receiver.rb', line 43

def page=(page)
  @state = PageState.new(page)
  @page = page
  @content = []
  @characters = []
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



78
79
80
81
82
# File 'lib/pdf/reader/page_text_receiver.rb', line 78

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#show_text(string) ⇒ Object

Text Showing Operators

record text that is drawn on the page



59
60
61
# File 'lib/pdf/reader/page_text_receiver.rb', line 59

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



63
64
65
66
67
68
69
70
71
# File 'lib/pdf/reader/page_text_receiver.rb', line 63

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    else
      @state.process_glyph_displacement(0, arg, false)
    end
  end
end