Class: PdfParadise::ConvertPdfToText

Inherits:
Base
  • Object
show all
Defined in:
lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb

Overview

PdfParadise::ConvertPdfToText

Constant Summary collapse

NAMESPACE =
#

NAMESPACE

#
inspect

Instance Method Summary collapse

Methods inherited from Base

#basename, #be_verbose?, #change_directory, #commandline_arguments?, #copy_file, #delete_file, #e, #ecomment, #esystem, #first_argument?, #gold, #infer_the_namespace, #input_without_leading_hyphens?, #internal_hash?, #is_an_image_file?, #is_on_roebe?, #lightsteelblue, #log_dir?, #mkdir, #mv, #n_pages?, #namespace?, #no_file_at, #opne, #orange, #reset_the_internal_hash, #return_commandline_arguments_starting_with_hyphens, #return_files_from_the_commandline_arguments, #return_pwd, #rev, #set_be_quiet, #set_commandline_arguments, #steelblue, #try_to_ensure_that_this_directory_exists, #write_what_into

Constructor Details

#initialize(i = ARGV, run_already = true) ⇒ ConvertPdfToText

#

initialize

#


31
32
33
34
35
36
37
38
39
40
41
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 31

def initialize(
    i           = ARGV,
    run_already = true
  )
  reset
  # ======================================================================= #
  # Next designate which files are to be converted.
  # ======================================================================= #
  set_convert_these(i)
  run if run_already
end

Instance Method Details

#convert_this_pdf(i = @convert_these) ⇒ Object Also known as: do_convert

#

convert_this_pdf

This will make use of the binary called “pdftotext”.

#


100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 100

def convert_this_pdf(
    i = @convert_these
  )
  if i.is_a? Array
    i.each {|entry| convert_this_pdf(entry) }
  else
    if File.exist? i
      opnn; e "Now converting `#{sfile(i)}#{rev}` via #{simp('pdftotext')}#{rev}."
      # =================================================================== #
      # Delegate towards pdftotext next; since as of January 2024 we will
      # use -enc UTF-8 by default.
      # =================================================================== #
      cmdline_to_use = 'pdftotext -enc UTF-8 "'+i+'"' # We use "". 
      @output_file = i.gsub(/#{File.extname(i)}/, '')+'.txt'
      system(cmdline_to_use)
      opnn; e "Storing into the file `#{sfile(@output_file)}`."
    else
      unless i.start_with? '--'
        opnn; e 'No file called '+sfile(i)+' could be found.'
      end
    end
  end
end

#do_start_the_GUI_interfaceObject

#

do_start_the_GUI_interface

#


153
154
155
156
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 153

def do_start_the_GUI_interface
  require 'pdf_paradise/gui/gtk3/convert_pdf_to_text/convert_pdf_to_text.rb'
  ::PdfParadise::GUI::Gtk::ConvertPdfToText.run
end

#get_all_pdf_files_from_current_directoryObject

#

get_all_pdf_files_from_current_directory

This method will obtain all pdf files from the current directory.

#


55
56
57
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 55

def get_all_pdf_files_from_current_directory
  return Dir['*.pdf']
end
#

menu (menu tag)

#


134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 134

def menu(
    i = @convert_these
  )
  if i.is_a? Array
    i.each {|entry| menu(entry) }
  else
    case i
    # ===================================================================== #
    # === cpdf --gui
    # ===================================================================== #
    when /^-?-?gui/i
      do_start_the_GUI_interface
    end
  end
end

#opnnObject

#

opnn

#


91
92
93
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 91

def opnn
  super(NAMESPACE)
end

#output_file?Boolean

#

output_file?

#

Returns:

  • (Boolean)


127
128
129
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 127

def output_file?
  @output_file
end

#resetObject

#

reset

#


46
47
48
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 46

def reset
  super()
end

#runObject

#

run (run tag)

#


161
162
163
164
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 161

def run
  menu
  start_conversion
end

#set_convert_these(i = return_pwd) ⇒ Object

#

set_convert_these

This method will preferentially try to work on only .pdf files.

#


64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 64

def set_convert_these(
    i = return_pwd
  )
  i = return_pwd if i.nil?
  if i.is_a? String
    if i.empty?
      i = get_all_pdf_files_from_current_directory()
    elsif File.directory? i
      i = get_all_pdf_files_from_current_directory()
    end
  elsif i.is_a? Array
    i = get_all_pdf_files_from_current_directory() if i.empty?
  end
  i = [i] unless i.is_a? Array
  @convert_these = i # This must be an Array.
end

#start_conversionObject

#

start_conversion

#


84
85
86
# File 'lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb', line 84

def start_conversion
  @convert_these.each {|pdf| convert_this_pdf(pdf) }
end