Class: Wukong::FlatPack::Parser
- Inherits:
-
Object
- Object
- Wukong::FlatPack::Parser
- Defined in:
- lib/wukong/model/flatpack_parser/parser.rb
Instance Attribute Summary collapse
-
#lang ⇒ Object
Returns the value of attribute lang.
-
#re ⇒ Object
Returns the value of attribute re.
Instance Method Summary collapse
-
#file_to_tsv(in_filename, out_filename, trim = true) ⇒ Object
-
#initialize(lang) ⇒ Parser
constructor
A new instance of Parser.
-
#line_to_tsv(line, trim = true) ⇒ Object
-
#parse(str, trim = false) ⇒ Object
-
#re_from_language(lang) ⇒ Object
Creates a regular expression from the supplied language.
-
#string_in_lang?(str) ⇒ Boolean
returns true if the supplied string is in the parser’s language.
Constructor Details
#initialize(lang) ⇒ Parser
Returns a new instance of Parser.
7 8 9 10 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 7 def initialize(lang) @lang = lang @re = re_from_language @lang end |
Instance Attribute Details
#lang ⇒ Object
Returns the value of attribute lang.
5 6 7 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 5 def lang @lang end |
#re ⇒ Object
Returns the value of attribute re.
4 5 6 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 4 def re @re end |
Instance Method Details
#file_to_tsv(in_filename, out_filename, trim = true) ⇒ Object
41 42 43 44 45 46 47 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 41 def file_to_tsv(in_filename,out_filename,trim=true) infile = File.open(in_filename,'r') outfile = File.open(out_filename,'a') infile.each_line do |line| outfile.write(line_to_tsv(line,trim)) end end |
#line_to_tsv(line, trim = true) ⇒ Object
49 50 51 52 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 49 def line_to_tsv(line,trim=true) fields = parse(line,trim) return fields.join("\t") + "\n" end |
#parse(str, trim = false) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 28 def parse(str,trim=false) return nil unless string_in_lang? str result = [] str.match(@re)[1..-1].each_with_index do |val,index| token = lang[index].translate(val) if trim and token.is_a?(String) token.strip! end result << token end return result - [:ignore] end |
#re_from_language(lang) ⇒ Object
Creates a regular expression from the supplied language
19 20 21 22 23 24 25 26 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 19 def re_from_language lang regex = "^" lang.each do |token| regex += "(#{token.re})" end regex += "$" return Regexp.new(regex) end |
#string_in_lang?(str) ⇒ Boolean
returns true if the supplied string is in the parser’s language
13 14 15 |
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 13 def string_in_lang? str return (not (str =~ @re).nil?) end |