Class: TextProcessing

Inherits:
Object
  • Object
show all
Defined in:
lib/lumix/textprocessing.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lang = 'ro') ⇒ TextProcessing

Returns a new instance of TextProcessing.



9
10
11
# File 'lib/lumix/textprocessing.rb', line 9

def initialize(lang = 'ro')
  @lang = lang
end

Instance Attribute Details

#langObject

Returns the value of attribute lang.



7
8
9
# File 'lib/lumix/textprocessing.rb', line 7

def lang
  @lang
end

Instance Method Details

#create_tagged_filename(infile) ⇒ Object

inserts “tagged” as the second to last part in the filename e.g.

test.txt -> test.tagged.txt

special case when no extension is present:

README -> README.tagged


24
25
26
27
28
29
# File 'lib/lumix/textprocessing.rb', line 24

def create_tagged_filename(infile)
  components = infile.split(/\./)
  position = [1, components.size-1].max
  components.insert position, 'tagged'
  components.join '.'
end

#process(text) ⇒ Object

the core processing routing using the webservice



43
44
45
46
# File 'lib/lumix/textprocessing.rb', line 43

def process(text)
  response = rpc.Process(:input => text, :lang => lang)
  response.processResult
end

#process_file(infile, outfile) ⇒ Object

takes the text from infile and outputs the result into the outfile



53
54
55
56
57
# File 'lib/lumix/textprocessing.rb', line 53

def process_file(infile, outfile)
  File.open(outfile, 'w') do |out|
    out.write process(File.read(infile))
  end
end

#process_stdinObject



48
49
50
# File 'lib/lumix/textprocessing.rb', line 48

def process_stdin
  puts process($stdin.read)
end

#rpcObject



13
14
15
16
17
# File 'lib/lumix/textprocessing.rb', line 13

def rpc
  @rpc if @rpc
  wsdl = SOAP::WSDLDriverFactory.new('http://www.racai.ro/webservices/TextProcessing.asmx?WSDL')
  @rpc = wsdl.create_rpc_driver
end

#to_filelist(*files) ⇒ Object



31
32
33
34
35
36
37
38
39
40
# File 'lib/lumix/textprocessing.rb', line 31

def to_filelist(*files)
  files = files.flatten.map do |filename|
    if File.directory?  filename
      Dir.glob File.join(filename, '**/*') # add all files from that directory
    else
      filename
    end
  end.flatten.compact.uniq # make sure every file is only processed once
  files.delete_if { |filename| File.directory?(filename) ||  filename['.tagged']} # remove remaining folders
end