Class: UdiseCaptchaReader::Reader

Inherits:
Object
  • Object
show all
Defined in:
lib/udise_captcha_reader/reader.rb

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Reader

Returns a new instance of Reader.



7
8
9
10
11
12
13
14
15
16
# File 'lib/udise_captcha_reader/reader.rb', line 7

def initialize(options = {})
  @options = {
    lang: "eng",
    processor: "mini_magick",
    psm: 7,  # Treat image as a single text line
    oem: 1   # Use LSTM OCR Engine
  }.merge(options)
  
  @recognizer = CharacterRecognizer.new(@options)
end

Instance Method Details

#read_text(image_path) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/udise_captcha_reader/reader.rb', line 18

def read_text(image_path)
  raise Error, "Image file not found" unless File.exist?(image_path)

  # Get original filename without extension
  original_filename = File.basename(image_path, ".*")

  # First preprocess the image
  processed = ImagePreprocessor.preprocess_image(image_path, :standard)
  
  # Open the processed image for splitting
  image = MiniMagick::Image.open(processed.path)
  
  # Split into characters
  char_images = CharacterSplitter.split_into_characters(image, original_filename)
  
  # Recognize each character
  recognized_chars = char_images.map { |char_image| @recognizer.recognize_character(char_image) }
  
  # Join the characters
  recognized_chars.join
rescue RTesseract::Error => e
  raise Error, "OCR processing failed: #{e.message}"
end