Class: Edifunct::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/edifunct/tokenizer.rb

Overview

Tokenizer is responsible for splitting message into segments, data elements and components.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":") ⇒ Tokenizer

Returns a new instance of Tokenizer.



35
36
37
38
39
40
# File 'lib/edifunct/tokenizer.rb', line 35

def initialize(release_character: "?", segment_terminator: "'", data_element_separator: "+", component_data_element_separator: ":")
  @release_character = release_character
  @segment_terminator = segment_terminator
  @data_element_separator = data_element_separator
  @component_data_element_separator = component_data_element_separator
end

Instance Attribute Details

#component_data_element_separatorObject (readonly)

Returns the value of attribute component_data_element_separator.



33
34
35
# File 'lib/edifunct/tokenizer.rb', line 33

def component_data_element_separator
  @component_data_element_separator
end

#data_element_separatorObject (readonly)

Returns the value of attribute data_element_separator.



33
34
35
# File 'lib/edifunct/tokenizer.rb', line 33

def data_element_separator
  @data_element_separator
end

#release_characterObject (readonly)

Returns the value of attribute release_character.



33
34
35
# File 'lib/edifunct/tokenizer.rb', line 33

def release_character
  @release_character
end

#segment_terminatorObject (readonly)

Returns the value of attribute segment_terminator.



33
34
35
# File 'lib/edifunct/tokenizer.rb', line 33

def segment_terminator
  @segment_terminator
end

Class Method Details

.for_message(edifact_message) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/edifunct/tokenizer.rb', line 9

def for_message(edifact_message)
  if edifact_message.start_with?("UNA")
    _u,
    _n,
    _a,
    component_data_element_separator,
    data_element_separator,
    _decimal_mark_delimiter,
    release_character,
    _reserved,
    segment_terminator = edifact_message.chars

    new(
      release_character: release_character,
      segment_terminator: segment_terminator,
      data_element_separator: data_element_separator,
      component_data_element_separator: component_data_element_separator
    )
  else
    new
  end
end

Instance Method Details

#as_segments(message_as_string) ⇒ Object



42
43
44
45
46
47
48
# File 'lib/edifunct/tokenizer.rb', line 42

def as_segments(message_as_string)
  strip_service_string_advice(message_as_string).split(segment_regexp).map do |raw_segment|
    segment_tag, data_elements = split_segment(raw_segment)

    Segment.new(tag: segment_tag, raw_segment: raw_segment, data_elements: data_elements)
  end
end

#decode_value(encoded_value) ⇒ Object



63
64
65
# File 'lib/edifunct/tokenizer.rb', line 63

def decode_value(encoded_value)
  encoded_value.gsub(escape_value_regexp, '\1')
end

#formatted_segments_per_line(message_as_string) ⇒ Object



67
68
69
# File 'lib/edifunct/tokenizer.rb', line 67

def formatted_segments_per_line(message_as_string)
  message_as_string.gsub(segment_regexp, "\n")
end

#split_segment(raw_segment) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/edifunct/tokenizer.rb', line 50

def split_segment(raw_segment)
  segment_without_terminator = raw_segment.chomp(@segment_terminator)
  segment_tag, *data_elements_as_strings = segment_without_terminator.split(data_element_regexp)

  data_elements = data_elements_as_strings.map do |data_element_as_string|
    data_element_as_string.split(component_data_element_regexp).map do |component|
      decode_value(component)
    end
  end

  [segment_tag, data_elements]
end