Class: PDF::Reader::PageTextReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::PageTextReceiver
- Extended by:
- Forwardable
- Defined in:
- lib/pdf/reader/page_text_receiver.rb
Overview
Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.
Constant Summary collapse
- SPACE =
" "
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#state ⇒ Object
readonly
Returns the value of attribute state.
Instance Method Summary collapse
-
#content ⇒ Object
deprecated.
-
#invoke_xobject(label) ⇒ Object
XObjects.
-
#move_to_next_line_and_show_text(str) ⇒ Object
‘.
-
#page=(page) ⇒ Object
starting a new page.
- #runs(opts = {}) ⇒ Object
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“.
-
#show_text(string) ⇒ Object
Text Showing Operators.
-
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)].
Instance Attribute Details
#options ⇒ Object (readonly)
Returns the value of attribute options.
19 20 21 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 19 def end |
#state ⇒ Object (readonly)
Returns the value of attribute state.
19 20 21 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 19 def state @state end |
Instance Method Details
#content ⇒ Object
deprecated
83 84 85 86 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 83 def content mediabox = @page.rectangles[:MediaBox] PageLayout.new(runs, mediabox).to_s end |
#invoke_xobject(label) ⇒ Object
XObjects
122 123 124 125 126 127 128 129 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 122 def invoke_xobject(label) @state.invoke_xobject(label) do |xobj| case xobj when PDF::Reader::FormXObject then xobj.walk(self) end end end |
#move_to_next_line_and_show_text(str) ⇒ Object
‘
108 109 110 111 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 108 def move_to_next_line_and_show_text(str) # ' @state.move_to_start_of_next_line show_text(str) end |
#page=(page) ⇒ Object
starting a new page
43 44 45 46 47 48 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 43 def page=(page) @state = PageState.new(page) @page = page @content = [] @characters = [] end |
#runs(opts = {}) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 50 def runs(opts = {}) runs = @characters if rect = opts.fetch(:rect, @page.rectangles[:CropBox]) runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect) end if opts.fetch(:skip_zero_width, true) runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs) end if opts.fetch(:skip_overlapping, true) runs = OverlappingRunsFilter.exclude_redundant_runs(runs) end runs = NoTextFilter.exclude_empty_strings(runs) if opts.fetch(:merge, true) runs = merge_runs(runs) end if (only_filter = opts.fetch(:only, nil)) runs = AdvancedTextRunFilter.only(runs, only_filter) end if (exclude_filter = opts.fetch(:exclude, nil)) runs = AdvancedTextRunFilter.exclude(runs, exclude_filter) end runs end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“
113 114 115 116 117 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 113 def set_spacing_next_line_show_text(aw, ac, string) # " @state.set_word_spacing(aw) @state.set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#show_text(string) ⇒ Object
Text Showing Operators
record text that is drawn on the page
92 93 94 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 92 def show_text(string) # Tj (AWAY) internal_show_text(string) end |
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)]
96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 96 def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |arg| if arg.is_a?(String) internal_show_text(arg) elsif arg.is_a?(Numeric) @state.process_glyph_displacement(0, arg, false) else # skip it end end end |