Class: Honyomi::Pdf
- Inherits:
-
Object
- Object
- Honyomi::Pdf
- Defined in:
- lib/honyomi/pdf.rb
Instance Method Summary collapse
- #generate_images(output_dir) ⇒ Object
-
#initialize(filename) ⇒ Pdf
constructor
A new instance of Pdf.
- #pages ⇒ Object
Constructor Details
#initialize(filename) ⇒ Pdf
Returns a new instance of Pdf.
10 11 12 |
# File 'lib/honyomi/pdf.rb', line 10 def initialize(filename) @filename = filename end |
Instance Method Details
#generate_images(output_dir) ⇒ Object
39 40 41 42 |
# File 'lib/honyomi/pdf.rb', line 39 def generate_images(output_dir) FileUtils.mkdir_p output_dir system("pdftoppm", "-jpeg", @filename, File.join(output_dir, "book")) end |
#pages ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/honyomi/pdf.rb', line 14 def pages result = [] Dir.mktmpdir do |dir| outfile = File.join(dir, "pdf.txt") loop do page_no = (result.count + 1).to_s o, e, s = Open3.capture3("pdftotext -f #{page_no} -l #{page_no} #{Shellwords.escape(@filename)} #{Shellwords.escape(outfile)}") # Need pdftotext (poppler, xpdf) break if s.exitstatus != 0 text = File.read(outfile, encoding: Encoding::UTF_8) if String.method_defined? :scrub text = text.scrub('?') end result << text end end result end |