Class: LanguageToolProcess

Inherits:
Object
  • Object
show all
Defined in:
lib/gherkin_language/language_tool_process.rb

Overview

This service class provides access to language tool process.

Constant Summary collapse

VERSION =
'LanguageTool-3.9'.freeze
URL =
"https://www.languagetool.org/download/#{VERSION}.zip".freeze
NGRAM_VERSION =
'ngrams-en-20150817'.freeze
NGRAM_URL =
"https://languagetool.org/download/ngram-data/#{NGRAM_VERSION}.zip".freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ngrams = false, unknown_words = false) ⇒ LanguageToolProcess

Returns a new instance of LanguageToolProcess.



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/gherkin_language/language_tool_process.rb', line 24

def initialize(ngrams = false, unknown_words = false)
  path = Dir.tmpdir
  download(path, URL) unless File.exist? "#{path}/#{VERSION}/languagetool-commandline.jar"
  if ngrams
    @ngrams_path = "#{path}/#{NGRAM_VERSION}"
    download("#{@ngrams_path}/en", NGRAM_URL) unless File.exist? @ngrams_path
  end
  @path = path
  @p = nil
  @reference_line = 0
  @errors = []
  @check_unknown_words = unknown_words
  @unknown_words = []
  @ngrams = ngrams
  use_user_glossary "#{path}/#{VERSION}" if File.exist? '.glossary'
end

Instance Attribute Details

#errorsObject

Returns the value of attribute errors.



17
18
19
# File 'lib/gherkin_language/language_tool_process.rb', line 17

def errors
  @errors
end

#unknown_wordsObject

Returns the value of attribute unknown_words.



17
18
19
# File 'lib/gherkin_language/language_tool_process.rb', line 17

def unknown_words
  @unknown_words
end

Instance Method Details

#check_paragraph(paragraph) ⇒ Object



93
94
95
96
97
98
99
# File 'lib/gherkin_language/language_tool_process.rb', line 93

def check_paragraph(paragraph)
  start_line = @reference_line
  send paragraph
  end_line = @reference_line
  send "\n\n"
  Range.new(start_line, end_line)
end

#decode_error(error) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
# File 'lib/gherkin_language/language_tool_process.rb', line 115

def decode_error(error)
  Error.new(
    error.attributes['category'],
    error.attributes['context'].strip,
    error.attributes['locqualityissuetype'],
    error.attributes['msg'],
    error.attributes['replacements'],
    error.attributes['ruleId'],
    error.attributes['fromy'].to_i,
    error.attributes['toy'].to_i)
end

#download(path, url) ⇒ Object



49
50
51
52
53
54
# File 'lib/gherkin_language/language_tool_process.rb', line 49

def download(path, url)
  system "wget -q -O /var/tmp/languagetool.zip #{url}"
  FileUtils.mkdir_p path
  system "unzip -qq -o /var/tmp/languagetool.zip -d #{path}"
  system 'rm /var/tmp/languagetool.zip'
end

#parse_errors(result) ⇒ Object



106
107
108
109
110
111
112
113
# File 'lib/gherkin_language/language_tool_process.rb', line 106

def parse_errors(result)
  doc = REXML::Document.new result
  errors = []
  doc.elements.each '//error' do |error|
    errors.push decode_error error
  end
  errors
end

#parse_unknown_words(result) ⇒ Object



127
128
129
130
131
132
133
134
# File 'lib/gherkin_language/language_tool_process.rb', line 127

def parse_unknown_words(result)
  doc = REXML::Document.new result
  errors = []
  doc.elements.each '//unknown_words/word' do |error|
    errors.push error.text
  end
  errors
end

#send(sentence) ⇒ Object



101
102
103
104
# File 'lib/gherkin_language/language_tool_process.rb', line 101

def send(sentence)
  @reference_line += sentence.count "\n"
  @p.write sentence
end

#start!Object



56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/gherkin_language/language_tool_process.rb', line 56

def start!
  @errors = []
  @unknown_words = []
  @reference_line = 0
  Dir.chdir("#{@path}/#{VERSION}/") do
    command = 'java -jar languagetool-commandline.jar '
    command += '--list-unknown ' if @check_unknown_words
    command += '--api --language en-us '
    command += "--languagemodel #{@ngrams_path}" if @ngrams
    @p = IO.popen("#{command} - 2>&1", 'r+')
  end
end

#stop!Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/gherkin_language/language_tool_process.rb', line 136

def stop!
  @p.close_write
  errors = ''
  line = @p.readline
  loop do
    break if line == "<!--\n"
    errors << line
    begin
      line = @p.readline
    rescue EOFError
      break
    end
  end
  @errors = parse_errors errors
  @unknown_words = parse_unknown_words errors
  @p.close
  raise 'language tool failed' unless $?.success?
end

#tag(sentences) ⇒ Object



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/gherkin_language/language_tool_process.rb', line 69

def tag(sentences)
  output = ''
  Dir.chdir("#{@path}/#{VERSION}/") do
    p = IO.popen('java -jar languagetool-commandline.jar --taggeronly --api --language en-US - 2>/dev/null', 'r+')
    sentences.each { |sentence| p.write sentence }
    p.close_write
    line = p.readline
    loop do
      break if line == "<!--\n"
      output << line
      begin
        line = p.readline
      rescue EOFError
        break
      end
    end
    p.close
  end
  output.tr!(' ', "\n")
  output.tr!(']', "]\n")
  output.tr!("\n\n", "\n")
  output
end

#use_user_glossary(path) ⇒ Object



41
42
43
44
45
46
47
# File 'lib/gherkin_language/language_tool_process.rb', line 41

def use_user_glossary(path)
  resource_path = "#{path}/org/languagetool/resource/en"
  system "cp #{resource_path}/added.txt #{resource_path}/added.copy && cp .glossary #{resource_path}/added.txt"
  at_exit do
    system "cp #{resource_path}/added.copy #{resource_path}/added.txt"
  end
end