Class: TbxImporter::Tbx
- Inherits:
-
Object
- Object
- TbxImporter::Tbx
- Defined in:
- lib/tbx_importer.rb
Instance Attribute Summary collapse
-
#encoding ⇒ Object
readonly
Returns the value of attribute encoding.
-
#file_path ⇒ Object
readonly
Returns the value of attribute file_path.
Instance Method Summary collapse
- #import ⇒ Object
-
#initialize(file_path:, **args) ⇒ Tbx
constructor
A new instance of Tbx.
- #stats ⇒ Object
Constructor Details
#initialize(file_path:, **args) ⇒ Tbx
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/tbx_importer.rb', line 10 def initialize(file_path:, **args) @file_path = file_path @content = File.read(open(@file_path)) if !args[:encoding].eql?('UTF-8') if args[:encoding].nil? @encoding = CharlockHolmes::EncodingDetector.detect(@content[0..100_000])[:encoding] if @encoding.nil? encoding_in_file = @content.dup.force_encoding('utf-8').scrub!("*").gsub!(/\0/, '').scan(/(?<=encoding=").*(?=")/)[0].upcase if encoding_in_file.eql?('UTF-8') @encoding = ('UTF-8') elsif encoding_in_file.eql?('UTF-16') @encoding = ('UTF-16LE') end end else @encoding = args[:encoding].upcase end @doc = { source_language: "", tc: { id: "", counter: 0, vals: [], lang: "", definition: "" }, term: { lang: "", counter: 0, vals: [], part_of_speech: "", term: "" }, language_pairs: [], term_entry: false } raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil? raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE') @text = CharlockHolmes::Converter.convert(@content, @encoding, 'UTF-8') if !@encoding.eql?('UTF-8') end |
Instance Attribute Details
#encoding ⇒ Object (readonly)
Returns the value of attribute encoding.
9 10 11 |
# File 'lib/tbx_importer.rb', line 9 def encoding @encoding end |
#file_path ⇒ Object (readonly)
Returns the value of attribute file_path.
9 10 11 |
# File 'lib/tbx_importer.rb', line 9 def file_path @file_path end |
Instance Method Details
#import ⇒ Object
47 48 49 50 51 |
# File 'lib/tbx_importer.rb', line 47 def import reader = read_file parse_file(reader) [@doc[:tc][:vals], @doc[:term][:vals]] end |
#stats ⇒ Object
38 39 40 41 42 43 44 45 |
# File 'lib/tbx_importer.rb', line 38 def stats if encoding.eql?('UTF-8') analyze_stats_utf_8 else analyze_stats_utf_16 end {tc_count: @doc[:tc][:counter], term_count: @doc[:term][:counter], language_pairs: @doc[:language_pairs].uniq} end |