Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary collapse

SPACE =
" "

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



19
20
21
# File 'lib/pdf/reader/page_text_receiver.rb', line 19

def options
  @options
end

#stateObject (readonly)

Returns the value of attribute state.



19
20
21
# File 'lib/pdf/reader/page_text_receiver.rb', line 19

def state
  @state
end

Instance Method Details

#contentObject

deprecated



75
76
77
78
# File 'lib/pdf/reader/page_text_receiver.rb', line 75

def content
  mediabox = @page.rectangles[:MediaBox]
  PageLayout.new(runs, mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



114
115
116
117
118
119
120
121
# File 'lib/pdf/reader/page_text_receiver.rb', line 114

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#move_to_next_line_and_show_text(str) ⇒ Object



100
101
102
103
# File 'lib/pdf/reader/page_text_receiver.rb', line 100

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

#page=(page) ⇒ Object

starting a new page



43
44
45
46
47
48
# File 'lib/pdf/reader/page_text_receiver.rb', line 43

def page=(page)
  @state = PageState.new(page)
  @page = page
  @content = []
  @characters = []
end

#runs(opts = {}) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/pdf/reader/page_text_receiver.rb', line 50

def runs(opts = {})
  runs = @characters

  if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
    runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
  end

  if opts.fetch(:skip_zero_width, true)
    runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
  end

  if opts.fetch(:skip_overlapping, true)
    runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
  end

  runs = NoTextFilter.exclude_empty_strings(runs)

  if opts.fetch(:merge, true)
    runs = merge_runs(runs)
  end

  runs
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



105
106
107
108
109
# File 'lib/pdf/reader/page_text_receiver.rb', line 105

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#show_text(string) ⇒ Object

Text Showing Operators

record text that is drawn on the page



84
85
86
# File 'lib/pdf/reader/page_text_receiver.rb', line 84

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



88
89
90
91
92
93
94
95
96
97
98
# File 'lib/pdf/reader/page_text_receiver.rb', line 88

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    elsif arg.is_a?(Numeric)
      @state.process_glyph_displacement(0, arg, false)
    else
      # skip it
    end
  end
end