Class: Rababa::Encoders::TextEncoder

Inherits:
Object
  • Object
show all
Includes:
Harakats
Defined in:
lib/rababa/encoders.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Harakats

#basic_cleaners, #collapse_whitespace, #extract_haraqat, #extract_stack, #remove_diacritics, #valid_arabic_cleaners

Constructor Details

#initialize(input_chars, target_charts, cleaner, reverse_input) ⇒ TextEncoder

Returns a new instance of TextEncoder.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/rababa/encoders.rb', line 20

def initialize(input_chars, target_charts,
               cleaner,
               reverse_input)

    # cleaner fcts
    @cleaner = cleaner

    @pad = "P"
    @input_symbols = [@pad] + input_chars
    @target_symbols = [@pad] + target_charts

    # encoding of arabic without diacritics
    @input_symbol_to_id = Hash[*@input_symbols.map.with_index \
                                          {|s, i| [s, i] }.flatten]
    @input_id_to_symbol = Hash[*@input_symbols.map.with_index \
                                          {|s, i| [i, s] }.flatten]
    # encoding of haraqats
    @target_symbol_to_id = Hash[*@target_symbols.map.with_index \
                                          {|s, i| [s, i] }.flatten]
    @target_id_to_symbol = Hash[*@target_symbols.map.with_index \
                                          {|s, i| [i, s] }.flatten]
    @utarget_id_to_symbol = Hash[ \
        *Rababa::ArabicConstants::UALL_POSSIBLE_HARAQAT.keys.map.with_index \
                                              {|s, i| [i, s] }.flatten]

    @reverse_input = reverse_input
    @input_pad_id = @input_symbol_to_id[@pad]
    @start_symbol_id = nil
end

Instance Attribute Details

#input_id_to_symbolObject

Returns the value of attribute input_id_to_symbol.



16
17
18
# File 'lib/rababa/encoders.rb', line 16

def input_id_to_symbol
  @input_id_to_symbol
end

#input_pad_idObject

Returns the value of attribute input_pad_id.



16
17
18
# File 'lib/rababa/encoders.rb', line 16

def input_pad_id
  @input_pad_id
end

#start_symbol_idObject

Returns the value of attribute start_symbol_id.



16
17
18
# File 'lib/rababa/encoders.rb', line 16

def start_symbol_id
  @start_symbol_id
end

#target_id_to_symbolObject

Returns the value of attribute target_id_to_symbol.



16
17
18
# File 'lib/rababa/encoders.rb', line 16

def target_id_to_symbol
  @target_id_to_symbol
end

#utarget_id_to_symbolObject

Returns the value of attribute utarget_id_to_symbol.



16
17
18
# File 'lib/rababa/encoders.rb', line 16

def utarget_id_to_symbol
  @utarget_id_to_symbol
end

Instance Method Details

#clean(text) ⇒ Object

cleaner, should be a method instantiated at init.



51
52
53
54
55
56
57
# File 'lib/rababa/encoders.rb', line 51

def clean(text)
    if @cleaner == "basic_cleaners"
        basic_cleaners(text)
    elsif @cleaner == "valid_arabic_cleaners"
        valid_arabic_cleaners(text)
    end
end

#input_to_sequence(text) ⇒ Object

String -> Seq of chars -> List of indices



60
61
62
63
64
65
66
67
68
# File 'lib/rababa/encoders.rb', line 60

def input_to_sequence(text)
    if @reverse_input
        text = text.chars.reverse.join("")
    end

    text.chars.map do |s|
        @input_symbol_to_id[s]
    end.map.reject{|i| i.nil?}
end