Class: QuickAndRuby::Pdf::Document
- Inherits:
-
Object
- Object
- QuickAndRuby::Pdf::Document
- Defined in:
- lib/quick_and_ruby/pdf/document.rb
Constant Summary collapse
- DEFAULT_SCHEMES =
%w[http https].freeze
Instance Attribute Summary collapse
-
#filepath ⇒ Object
readonly
Returns the value of attribute filepath.
Instance Method Summary collapse
- #doc_reader ⇒ Object
- #extract_links(schemes: DEFAULT_SCHEMES) ⇒ Object
-
#initialize(filepath) ⇒ Document
constructor
A new instance of Document.
Constructor Details
#initialize(filepath) ⇒ Document
Returns a new instance of Document.
13 14 15 |
# File 'lib/quick_and_ruby/pdf/document.rb', line 13 def initialize(filepath) @filepath = filepath end |
Instance Attribute Details
#filepath ⇒ Object (readonly)
Returns the value of attribute filepath.
11 12 13 |
# File 'lib/quick_and_ruby/pdf/document.rb', line 11 def filepath @filepath end |
Instance Method Details
#doc_reader ⇒ Object
37 38 39 |
# File 'lib/quick_and_ruby/pdf/document.rb', line 37 def doc_reader @doc_reader ||= PDF::Reader.new(filepath) end |
#extract_links(schemes: DEFAULT_SCHEMES) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/quick_and_ruby/pdf/document.rb', line 17 def extract_links(schemes: DEFAULT_SCHEMES) links = [] doc_reader.pages.each do |page| text = page.text links += URI.extract(text, schemes) annots = page.attributes[:Annots] || [] annots.each do |annot_ref| annot = doc_reader.objects.deref(annot_ref) next unless annot.is_a?(Hash) && annot[:A] && annot[:A][:URI] target_uri = annot[:A][:URI] links += URI.extract(target_uri, schemes) end end links.uniq end |