Class: Honyomi::Pdf

Inherits:
Object
  • Object
show all
Defined in:
lib/honyomi/pdf.rb

Instance Method Summary collapse

Constructor Details

#initialize(filename) ⇒ Pdf

Returns a new instance of Pdf.



10
11
12
# File 'lib/honyomi/pdf.rb', line 10

def initialize(filename)
  @filename = filename
end

Instance Method Details

#generate_images(output_dir) ⇒ Object



39
40
41
42
# File 'lib/honyomi/pdf.rb', line 39

def generate_images(output_dir)
  FileUtils.mkdir_p output_dir
  system("pdftoppm", "-jpeg", @filename, File.join(output_dir, "book"))
end

#pagesObject



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/honyomi/pdf.rb', line 14

def pages
  result = []
  
  Dir.mktmpdir do |dir|
    outfile = File.join(dir, "pdf.txt")

    loop do
      page_no = (result.count + 1).to_s

      o, e, s = Open3.capture3("pdftotext -f #{page_no} -l #{page_no} #{Shellwords.escape(@filename)} #{Shellwords.escape(outfile)}") # Need pdftotext (poppler, xpdf)
      break if s.exitstatus != 0
      
      text = File.read(outfile, encoding: Encoding::UTF_8)

      if String.method_defined? :scrub
        text = text.scrub('?')
      end

      result << text
    end
  end

  result
end