Class: Wukong::FlatPack::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/wukong/model/flatpack_parser/parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(lang) ⇒ Parser

Returns a new instance of Parser.



7
8
9
10
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 7

def initialize(lang)
  @lang = lang
  @re = re_from_language @lang
end

Instance Attribute Details

#langObject

Returns the value of attribute lang.



5
6
7
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 5

def lang
  @lang
end

#reObject

Returns the value of attribute re.



4
5
6
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 4

def re
  @re
end

Instance Method Details

#file_to_tsv(in_filename, out_filename, trim = true) ⇒ Object



41
42
43
44
45
46
47
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 41

def file_to_tsv(in_filename,out_filename,trim=true)
  infile =  File.open(in_filename,'r')
  outfile = File.open(out_filename,'a')
  infile.each_line do |line|
    outfile.write(line_to_tsv(line,trim))
  end
end

#line_to_tsv(line, trim = true) ⇒ Object



49
50
51
52
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 49

def line_to_tsv(line,trim=true)
  fields = parse(line,trim)
  return fields.join("\t") + "\n"
end

#parse(str, trim = false) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 28

def parse(str,trim=false)
  return nil unless string_in_lang? str
  result = []
  str.match(@re)[1..-1].each_with_index do |val,index|
    token = lang[index].translate(val)
    if trim and token.is_a?(String)
      token.strip!
    end
    result << token
  end
  return result - [:ignore]
end

#re_from_language(lang) ⇒ Object

Creates a regular expression from the supplied language



19
20
21
22
23
24
25
26
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 19

def re_from_language lang
  regex = "^"
  lang.each do |token|
    regex += "(#{token.re})"
  end
  regex += "$"
  return Regexp.new(regex)
end

#string_in_lang?(str) ⇒ Boolean

returns true if the supplied string is in the parser’s language

Returns:

  • (Boolean)


13
14
15
# File 'lib/wukong/model/flatpack_parser/parser.rb', line 13

def string_in_lang? str
  return (not (str =~ @re).nil?)
end