Class: LanguageToolProcess
- Inherits:
-
Object
- Object
- LanguageToolProcess
- Defined in:
- lib/gherkin_language/language_tool_process.rb
Overview
This service class provides access to language tool process.
Constant Summary collapse
- VERSION =
'LanguageTool-3.9'.freeze
- URL =
"https://www.languagetool.org/download/#{VERSION}.zip".freeze
- NGRAM_VERSION =
'ngrams-en-20150817'.freeze
- NGRAM_URL =
"https://languagetool.org/download/ngram-data/#{NGRAM_VERSION}.zip".freeze
Instance Attribute Summary collapse
-
#errors ⇒ Object
Returns the value of attribute errors.
-
#unknown_words ⇒ Object
Returns the value of attribute unknown_words.
Instance Method Summary collapse
- #check_paragraph(paragraph) ⇒ Object
- #decode_error(error) ⇒ Object
- #download(path, url) ⇒ Object
-
#initialize(ngrams = false, unknown_words = false) ⇒ LanguageToolProcess
constructor
A new instance of LanguageToolProcess.
- #parse_errors(result) ⇒ Object
- #parse_unknown_words(result) ⇒ Object
- #send(sentence) ⇒ Object
- #start! ⇒ Object
- #stop! ⇒ Object
- #tag(sentences) ⇒ Object
- #use_user_glossary(path) ⇒ Object
Constructor Details
#initialize(ngrams = false, unknown_words = false) ⇒ LanguageToolProcess
Returns a new instance of LanguageToolProcess.
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/gherkin_language/language_tool_process.rb', line 24 def initialize(ngrams = false, unknown_words = false) path = Dir.tmpdir download(path, URL) unless File.exist? "#{path}/#{VERSION}/languagetool-commandline.jar" if ngrams @ngrams_path = "#{path}/#{NGRAM_VERSION}" download("#{@ngrams_path}/en", NGRAM_URL) unless File.exist? @ngrams_path end @path = path @p = nil @reference_line = 0 @errors = [] @check_unknown_words = unknown_words @unknown_words = [] @ngrams = ngrams use_user_glossary "#{path}/#{VERSION}" if File.exist? '.glossary' end |
Instance Attribute Details
#errors ⇒ Object
Returns the value of attribute errors.
17 18 19 |
# File 'lib/gherkin_language/language_tool_process.rb', line 17 def errors @errors end |
#unknown_words ⇒ Object
Returns the value of attribute unknown_words.
17 18 19 |
# File 'lib/gherkin_language/language_tool_process.rb', line 17 def unknown_words @unknown_words end |
Instance Method Details
#check_paragraph(paragraph) ⇒ Object
93 94 95 96 97 98 99 |
# File 'lib/gherkin_language/language_tool_process.rb', line 93 def check_paragraph(paragraph) start_line = @reference_line send paragraph end_line = @reference_line send "\n\n" Range.new(start_line, end_line) end |
#decode_error(error) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/gherkin_language/language_tool_process.rb', line 115 def decode_error(error) Error.new( error.attributes['category'], error.attributes['context'].strip, error.attributes['locqualityissuetype'], error.attributes['msg'], error.attributes['replacements'], error.attributes['ruleId'], error.attributes['fromy'].to_i, error.attributes['toy'].to_i) end |
#download(path, url) ⇒ Object
49 50 51 52 53 54 |
# File 'lib/gherkin_language/language_tool_process.rb', line 49 def download(path, url) system "wget -q -O /var/tmp/languagetool.zip #{url}" FileUtils.mkdir_p path system "unzip -qq -o /var/tmp/languagetool.zip -d #{path}" system 'rm /var/tmp/languagetool.zip' end |
#parse_errors(result) ⇒ Object
106 107 108 109 110 111 112 113 |
# File 'lib/gherkin_language/language_tool_process.rb', line 106 def parse_errors(result) doc = REXML::Document.new result errors = [] doc.elements.each '//error' do |error| errors.push decode_error error end errors end |
#parse_unknown_words(result) ⇒ Object
127 128 129 130 131 132 133 134 |
# File 'lib/gherkin_language/language_tool_process.rb', line 127 def parse_unknown_words(result) doc = REXML::Document.new result errors = [] doc.elements.each '//unknown_words/word' do |error| errors.push error.text end errors end |
#send(sentence) ⇒ Object
101 102 103 104 |
# File 'lib/gherkin_language/language_tool_process.rb', line 101 def send(sentence) @reference_line += sentence.count "\n" @p.write sentence end |
#start! ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/gherkin_language/language_tool_process.rb', line 56 def start! @errors = [] @unknown_words = [] @reference_line = 0 Dir.chdir("#{@path}/#{VERSION}/") do command = 'java -jar languagetool-commandline.jar ' command += '--list-unknown ' if @check_unknown_words command += '--api --language en-us ' command += "--languagemodel #{@ngrams_path}" if @ngrams @p = IO.popen("#{command} - 2>&1", 'r+') end end |
#stop! ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/gherkin_language/language_tool_process.rb', line 136 def stop! @p.close_write errors = '' line = @p.readline loop do break if line == "<!--\n" errors << line begin line = @p.readline rescue EOFError break end end @errors = parse_errors errors @unknown_words = parse_unknown_words errors @p.close raise 'language tool failed' unless $?.success? end |
#tag(sentences) ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/gherkin_language/language_tool_process.rb', line 69 def tag(sentences) output = '' Dir.chdir("#{@path}/#{VERSION}/") do p = IO.popen('java -jar languagetool-commandline.jar --taggeronly --api --language en-US - 2>/dev/null', 'r+') sentences.each { |sentence| p.write sentence } p.close_write line = p.readline loop do break if line == "<!--\n" output << line begin line = p.readline rescue EOFError break end end p.close end output.tr!(' ', "\n") output.tr!(']', "]\n") output.tr!("\n\n", "\n") output end |
#use_user_glossary(path) ⇒ Object
41 42 43 44 45 46 47 |
# File 'lib/gherkin_language/language_tool_process.rb', line 41 def use_user_glossary(path) resource_path = "#{path}/org/languagetool/resource/en" system "cp #{resource_path}/added.txt #{resource_path}/added.copy && cp .glossary #{resource_path}/added.txt" at_exit do system "cp #{resource_path}/added.copy #{resource_path}/added.txt" end end |