Class: Xtractor::Execute
- Inherits:
-
Object
- Object
- Xtractor::Execute
- Defined in:
- lib/xtractor/xtract.rb
Instance Method Summary collapse
- #begin_process ⇒ Object
- #collect_hash(*args) ⇒ Object
- #columns_filter(img) ⇒ Object
- #crop_throw(img) ⇒ Object
-
#initialize(image, api_key) ⇒ Execute
constructor
A new instance of Execute.
- #out_final(img) ⇒ Object
- #rows_filter(img) ⇒ Object
- #start(img) ⇒ Object
- #store_line_columns(img) ⇒ Object
- #store_line_rows(img) ⇒ Object
Constructor Details
#initialize(image, api_key) ⇒ Execute
Returns a new instance of Execute.
8 9 10 11 |
# File 'lib/xtractor/xtract.rb', line 8 def initialize(image, api_key) @image = image @api_key = api_key end |
Instance Method Details
#begin_process ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/xtractor/xtract.rb', line 13 def begin_process img = Magick::Image::read(@image).first if %w(TIFF).include? img.format crop_throw(img) else img.write('Conv_img.tif') img = Magick::Image::read('Conv_img.tif').first crop_throw(img) end end |
#collect_hash(*args) ⇒ Object
99 100 101 102 103 |
# File 'lib/xtractor/xtract.rb', line 99 def collect_hash(*args) api = Azure_API.new api.request_API(args[1]) out_final(args[0]) end |
#columns_filter(img) ⇒ Object
50 51 52 53 54 55 56 57 58 59 |
# File 'lib/xtractor/xtract.rb', line 50 def columns_filter(img) store_line_columns(img)[1..-1].inject( [[ (store_line_columns(img)[0]),(store_line_columns(img)[0]) ]]) do |arr, line| if line == arr.last[1]+1 arr.last[1] = line else arr << [line,line] end arr end end |
#crop_throw(img) ⇒ Object
25 26 27 28 29 30 |
# File 'lib/xtractor/xtract.rb', line 25 def crop_throw(img) image = img.resize_to_fit(2500,906) box = image.bounding_box image.crop!(box.x, box.y, box.width, box.height) start(image) end |
#out_final(img) ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/xtractor/xtract.rb', line 105 def out_final(img) output_file = File.open('table.tsv', 'w') rows_filter(img)[0..-2].each_with_index do |_row, i| text_row = [] columns_filter(img)[0..-2].each_with_index do |_column, j| text_row << File.open("cell-files/#{j}x#{i}.txt", 'r').readlines.map{|line| line.strip}.join(" ") end output_file.puts( text_row.join("\t")) end output_file.close end |
#rows_filter(img) ⇒ Object
61 62 63 64 65 66 67 68 69 70 |
# File 'lib/xtractor/xtract.rb', line 61 def rows_filter(img) store_line_rows(img)[1..-1].inject( [[ (store_line_rows(img)[0]), (store_line_rows(img)[0] )]]) do |arr, line| if line == arr.last[1]+1 arr.last[1] = line else arr << [line,line] end arr end end |
#start(img) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/xtractor/xtract.rb', line 74 def start(img) Dir.mkdir('cell-files') if !File.exist?('cell-files') rows_filter(img)[0..-2].each_with_index do |row, i| columns_filter(img)[0..-2].each_with_index do |column, j| x,y= column[1], row[1] w,h= columns_filter(img)[j+1][0]-x, rows_filter(img)[i+1][0]-y Magick::Image.constitute(w, h, "RGB", img.get_pixels(x,y,w,h).map{ |pixel| [pixel.red, pixel.green, pixel.blue]}.flatten).write("cell-files/#{j}x#{i}.jpg") do |out| out.depth=8 end r_image = Magick::Image::read("cell-files/#{j}x#{i}.jpg").first res_image = r_image.resize(r_image.columns,100) res_image.write("cell-files/#{j}x#{i}.jpg") do self.quality = 100 end end end collect_hash(img, @api_key) end |
#store_line_columns(img) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/xtractor/xtract.rb', line 41 def store_line_columns(img) (0...img.columns).inject([])do |arr, line_index| threshold = (img.rows*0.10).floor arr << line_index if img.get_pixels(line_index, 0, 1, (threshold)).select{|pixel| pixel.red < 63000 }.length >= threshold*0.95 arr end end |
#store_line_rows(img) ⇒ Object
32 33 34 35 36 37 38 39 |
# File 'lib/xtractor/xtract.rb', line 32 def store_line_rows(img) (0...img.rows).inject([]) do |arr, line_index| threshold = (img.columns*0.10).floor arr << line_index if img.get_pixels(0, line_index, (threshold), 1).select{|pixel| pixel.red < 63000 }.length >= threshold*0.95 arr end end |