Class: Tahweel::CLI::Options

Inherits:
Object
  • Object
show all
Defined in:
lib/tahweel/cli/options.rb

Overview

Parses command-line arguments for the Tahweel CLI.

Constant Summary collapse

POSITIVE_INTEGER =
/\A\+?[1-9]\d*(?:_\d+)*\z/

Class Method Summary collapse

Class Method Details

.configure_parser(opts, options) ⇒ Object

rubocop:disable Metrics/MethodLength,Metrics/AbcSize



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/tahweel/cli/options.rb', line 41

def self.configure_parser(opts, options) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize
  opts.program_name = "tahweel"
  opts.version = Tahweel::VERSION

  opts.accept(POSITIVE_INTEGER) do |value|
    n = Integer(value)
    raise OptionParser::InvalidArgument, "must be a positive integer" if n < 1

    n
  end

  opts.on(
    "-e", "--extensions EXTENSIONS", Array,
    "Comma-separated list of file extensions to process " \
    "(default: #{Tahweel::CLI::FileCollector::SUPPORTED_EXTENSIONS.join(", ")})"
  ) do |value|
    options[:extensions] = value
  end

  opts.on("--dpi DPI", POSITIVE_INTEGER, "DPI for PDF to Image conversion (default: #{options[:dpi]})") do |value|
    options[:dpi] = value
  end

  opts.on(
    "-p", "--processor PROCESSOR", Tahweel::Ocr::AVAILABLE_PROCESSORS,
    "OCR processor to use (default: google_drive). Available: #{Tahweel::Ocr::AVAILABLE_PROCESSORS.join(", ")}"
  ) do |value|
    options[:processor] = value
  end

  opts.on(
    "-P", "--page-concurrency PAGE_CONCURRENCY", POSITIVE_INTEGER,
    "Max concurrent OCR operations (default: #{options[:page_concurrency]})"
  ) do |value|
    options[:page_concurrency] = value
  end

  opts.on(
    "-F", "--file-concurrency FILE_CONCURRENCY", POSITIVE_INTEGER,
    "Max concurrent files to process (default: CPUs - 2 = #{options[:file_concurrency]})"
  ) do |value|
    options[:file_concurrency] = value
  end

  opts.on(
    "-f", "--formats FORMATS", Array,
    "Output formats (comma-separated, default: txt). Available: #{Tahweel::Writer::AVAILABLE_FORMATS.join(", ")}"
  ) do |value|
    options[:formats] = value.map(&:to_sym)

    invalid_formats = options[:formats] - Tahweel::Writer::AVAILABLE_FORMATS
    abort "Error: invalid format(s): #{invalid_formats.join(", ")}" if invalid_formats.any?
  end

  opts.on(
    "--page-separator SEPARATOR", String,
    "Separator between pages in TXT output (default: #{options[:page_separator].gsub("\n", "\\n")})"
  ) do |value|
    options[:page_separator] = value.gsub("\\n", "\n")
  end

  opts.on("-o", "--output DIR", String, "Output directory (default: current directory)") do |value|
    options[:output] = value
  end
end

.default_optionsObject



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/tahweel/cli/options.rb', line 29

def self.default_options
  {
    dpi: 150,
    processor: :google_drive,
    page_concurrency: Tahweel::Converter::DEFAULT_CONCURRENCY,
    file_concurrency: (Etc.nprocessors - 2).clamp(2..),
    output: nil,
    formats: %i[txt docx],
    page_separator: Tahweel::Writers::Txt::PAGE_SEPARATOR
  }
end

.parse(args) ⇒ Hash

Parses the command-line arguments.

Parameters:

  • args (Array<String>)

    The command-line arguments.

Returns:

  • (Hash)

    The parsed options.



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/tahweel/cli/options.rb', line 16

def self.parse(args)
  options = default_options
  parser = OptionParser.new { configure_parser(_1, options) }
  begin
    parser.parse!(args)
  rescue OptionParser::ParseError => e
    abort "Error: #{e.message}"
  end

  validate_args!(args, parser)
  options
end

.validate_args!(args, parser) ⇒ Object



107
108
109
110
111
112
# File 'lib/tahweel/cli/options.rb', line 107

def self.validate_args!(args, parser)
  return unless args.empty?

  puts parser
  exit 1
end