Class: Pingilish::MappingFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/pingilish.rb

Constant Summary collapse

PERSIAN_CHARS =
"۱۲۳۴۵۶۷۸۹۰،×؛ابپتثجحدذرزسصضطظفکگلمنك؟"
ENGLISH_CHARS =
"1234567890,*;abptsjhdzrzssztzfkglmnk?"
CHAR_MAP_FILE_PATH =
File.join(File.dirname(__FILE__), '../db/char_maps.yml')
CHAR_MAP =
YAML.load_file(CHAR_MAP_FILE_PATH)

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ MappingFilter

Returns a new instance of MappingFilter.



31
32
33
# File 'lib/pingilish.rb', line 31

def initialize(text)
  @text = text
end

Instance Method Details

#processObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/pingilish.rb', line 35

def process
  # general character replacement
  text = @text.to_s.tr(PERSIAN_CHARS,ENGLISH_CHARS)

  CHAR_MAP.each { |k, v| text.gsub!("#{k}","#{v}") }
  return text
  # TODO: make a mapping hash instead of this dirty gsub ing
  # TODO: replace non latin alphabet at the end
  # TODO: return an object with alternatives (like chert/chort)
  # TODO: seperate the login into classes: Tokenizer, StemmingFilter, DbFilter, GeneralMappingFilter
  # TODO: seperate the logic of dealing with a backend in a class, subclass it and write something for YML
  # BUG: major bug, do not replace latin chars that came from DB by latin chars in mapping array!
  # TODO: write the mapping filter interations
end