Class: Xtractor::Execute

Inherits:
Object
  • Object
show all
Defined in:
lib/xtractor/xtract.rb

Instance Method Summary collapse

Constructor Details

#initialize(image, api_key) ⇒ Execute

Returns a new instance of Execute.



8
9
10
11
# File 'lib/xtractor/xtract.rb', line 8

def initialize(image, api_key)
  @image = image
  @api_key = api_key
end

Instance Method Details

#begin_processObject



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/xtractor/xtract.rb', line 13

def  begin_process
   img = Magick::Image::read(@image).first

    if %w(TIFF).include? img.format
      crop_throw(img)
    else
      img.write('Conv_img.tif')
      img = Magick::Image::read('Conv_img.tif').first
      crop_throw(img)
    end
end

#collect_hash(*args) ⇒ Object



99
100
101
102
103
# File 'lib/xtractor/xtract.rb', line 99

def collect_hash(*args)
  api = Azure_API.new
  api.request_API(args[1])
  out_final(args[0])
end

#columns_filter(img) ⇒ Object



50
51
52
53
54
55
56
57
58
59
# File 'lib/xtractor/xtract.rb', line 50

def columns_filter(img)
  store_line_columns(img)[1..-1].inject( [[ (store_line_columns(img)[0]),(store_line_columns(img)[0]) ]]) do |arr, line|
    if line == arr.last[1]+1
      arr.last[1] = line
    else
      arr << [line,line]
    end
    arr
  end
end

#crop_throw(img) ⇒ Object



25
26
27
28
29
30
# File 'lib/xtractor/xtract.rb', line 25

def crop_throw(img)
   image = img.resize_to_fit(2500,906)
   box = image.bounding_box
   image.crop!(box.x, box.y, box.width, box.height)
  start(image)
end

#out_final(img) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
# File 'lib/xtractor/xtract.rb', line 105

def  out_final(img)
  output_file = File.open('table.tsv', 'w')
  rows_filter(img)[0..-2].each_with_index do |_row, i|
    text_row = []
      columns_filter(img)[0..-2].each_with_index do |_column, j|
          text_row << File.open("cell-files/#{j}x#{i}.txt", 'r').readlines.map{|line| line.strip}.join(" ")
      end
      output_file.puts( text_row.join("\t"))
  end
  output_file.close
end

#rows_filter(img) ⇒ Object



61
62
63
64
65
66
67
68
69
70
# File 'lib/xtractor/xtract.rb', line 61

def rows_filter(img)
  store_line_rows(img)[1..-1].inject( [[ (store_line_rows(img)[0]), (store_line_rows(img)[0] )]]) do |arr, line|
    if line == arr.last[1]+1
      arr.last[1] = line
    else
      arr << [line,line]
    end
    arr
  end
end

#start(img) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/xtractor/xtract.rb', line 74

def start(img)
  Dir.mkdir('cell-files') if !File.exist?('cell-files')

  rows_filter(img)[0..-2].each_with_index do |row, i|
    columns_filter(img)[0..-2].each_with_index do |column, j|
      x,y= column[1], row[1]
      w,h= columns_filter(img)[j+1][0]-x, rows_filter(img)[i+1][0]-y

      Magick::Image.constitute(w, h, "RGB", img.get_pixels(x,y,w,h).map{ |pixel|
      [pixel.red, pixel.green, pixel.blue]}.flatten).write("cell-files/#{j}x#{i}.jpg") do |out|
          out.depth=8
      end

       r_image = Magick::Image::read("cell-files/#{j}x#{i}.jpg").first
       res_image = r_image.resize(r_image.columns,100)

       res_image.write("cell-files/#{j}x#{i}.jpg") do
         self.quality = 100
       end

    end
  end
  collect_hash(img, @api_key)
end

#store_line_columns(img) ⇒ Object



41
42
43
44
45
46
47
48
# File 'lib/xtractor/xtract.rb', line 41

def store_line_columns(img)
  (0...img.columns).inject([])do |arr, line_index|
    threshold = (img.rows*0.10).floor
    arr << line_index if img.get_pixels(line_index, 0, 1, (threshold)).select{|pixel|
      pixel.red < 63000 }.length >= threshold*0.95
    arr
  end
end

#store_line_rows(img) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/xtractor/xtract.rb', line 32

def store_line_rows(img)
  (0...img.rows).inject([]) do |arr, line_index|
    threshold = (img.columns*0.10).floor
    arr << line_index if img.get_pixels(0, line_index, (threshold), 1).select{|pixel|
      pixel.red < 63000 }.length >= threshold*0.95
    arr
  end
end