Class: UdiseCaptchaReader::Reader
- Inherits:
-
Object
- Object
- UdiseCaptchaReader::Reader
- Defined in:
- lib/udise_captcha_reader/reader.rb
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ Reader
constructor
A new instance of Reader.
- #read_text(image_path) ⇒ Object
Constructor Details
#initialize(options = {}) ⇒ Reader
Returns a new instance of Reader.
7 8 9 10 11 12 13 14 15 16 |
# File 'lib/udise_captcha_reader/reader.rb', line 7 def initialize( = {}) = { lang: "eng", processor: "mini_magick", psm: 7, # Treat image as a single text line oem: 1 # Use LSTM OCR Engine }.merge() @recognizer = CharacterRecognizer.new() end |
Instance Method Details
#read_text(image_path) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/udise_captcha_reader/reader.rb', line 18 def read_text(image_path) raise Error, "Image file not found" unless File.exist?(image_path) # Get original filename without extension original_filename = File.basename(image_path, ".*") # First preprocess the image processed = ImagePreprocessor.preprocess_image(image_path, :standard) # Open the processed image for splitting image = MiniMagick::Image.open(processed.path) # Split into characters char_images = CharacterSplitter.split_into_characters(image, original_filename) # Recognize each character recognized_chars = char_images.map { |char_image| @recognizer.recognize_character(char_image) } # Join the characters recognized_chars.join rescue RTesseract::Error => e raise Error, "OCR processing failed: #{e.message}" end |