Module: Annex29::WordSegmentation

Defined in:
lib/annex_29/word_segmentation.rb

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

._segmenter_eof_transObject

Returns the value of attribute _segmenter_eof_trans.



2779
2780
2781
# File 'lib/annex_29/word_segmentation.rb', line 2779

def _segmenter_eof_trans
  @_segmenter_eof_trans
end

._segmenter_from_state_actionsObject

Returns the value of attribute _segmenter_from_state_actions.



2770
2771
2772
# File 'lib/annex_29/word_segmentation.rb', line 2770

def _segmenter_from_state_actions
  @_segmenter_from_state_actions
end

._segmenter_index_offsetsObject

Returns the value of attribute _segmenter_index_offsets.



1770
1771
1772
# File 'lib/annex_29/word_segmentation.rb', line 1770

def _segmenter_index_offsets
  @_segmenter_index_offsets
end

._segmenter_indiciesObject

Returns the value of attribute _segmenter_indicies.



1779
1780
1781
# File 'lib/annex_29/word_segmentation.rb', line 1779

def _segmenter_indicies
  @_segmenter_indicies
end

._segmenter_key_offsetsObject

Returns the value of attribute _segmenter_key_offsets.



14
15
16
# File 'lib/annex_29/word_segmentation.rb', line 14

def _segmenter_key_offsets
  @_segmenter_key_offsets
end

._segmenter_range_lengthsObject

Returns the value of attribute _segmenter_range_lengths.



1761
1762
1763
# File 'lib/annex_29/word_segmentation.rb', line 1761

def _segmenter_range_lengths
  @_segmenter_range_lengths
end

._segmenter_single_lengthsObject

Returns the value of attribute _segmenter_single_lengths.



1752
1753
1754
# File 'lib/annex_29/word_segmentation.rb', line 1752

def _segmenter_single_lengths
  @_segmenter_single_lengths
end

._segmenter_to_state_actionsObject

Returns the value of attribute _segmenter_to_state_actions.



2761
2762
2763
# File 'lib/annex_29/word_segmentation.rb', line 2761

def _segmenter_to_state_actions
  @_segmenter_to_state_actions
end

._segmenter_trans_actionsObject

Returns the value of attribute _segmenter_trans_actions.



2751
2752
2753
# File 'lib/annex_29/word_segmentation.rb', line 2751

def _segmenter_trans_actions
  @_segmenter_trans_actions
end

._segmenter_trans_keysObject

Returns the value of attribute _segmenter_trans_keys.



23
24
25
# File 'lib/annex_29/word_segmentation.rb', line 23

def _segmenter_trans_keys
  @_segmenter_trans_keys
end

._segmenter_trans_targsObject

Returns the value of attribute _segmenter_trans_targs.



2741
2742
2743
# File 'lib/annex_29/word_segmentation.rb', line 2741

def _segmenter_trans_targs
  @_segmenter_trans_targs
end

.segmenter_en_mainObject

Returns the value of attribute segmenter_en_main.



2801
2802
2803
# File 'lib/annex_29/word_segmentation.rb', line 2801

def segmenter_en_main
  @segmenter_en_main
end

.segmenter_errorObject

Returns the value of attribute segmenter_error.



2796
2797
2798
# File 'lib/annex_29/word_segmentation.rb', line 2796

def segmenter_error
  @segmenter_error
end

.segmenter_first_finalObject

Returns the value of attribute segmenter_first_final.



2792
2793
2794
# File 'lib/annex_29/word_segmentation.rb', line 2792

def segmenter_first_final
  @segmenter_first_final
end

.segmenter_startObject

Returns the value of attribute segmenter_start.



2788
2789
2790
# File 'lib/annex_29/word_segmentation.rb', line 2788

def segmenter_start
  @segmenter_start
end

Class Method Details

.call(input) ⇒ Object



2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
# File 'lib/annex_29/word_segmentation.rb', line 2809

def call(input)
  data = input.each_char.map(&:ord)
  eof = data.length
  words = []

  
# line 2816 "lib/annex_29/word_segmentation.rb"
begin
	p ||= 0
	pe ||= data.length
	cs = segmenter_start
	ts = nil
	te = nil
	act = 0
end

# line 181 "lib/annex_29/word_segmentation.rl"
  
# line 2828 "lib/annex_29/word_segmentation.rb"
begin
	testEof = false
	_klen, _trans, _keys = nil
	_goto_level = 0
	_resume = 10
	_eof_trans = 15
	_again = 20
	_test_eof = 30
	_out = 40
	while true
	if _goto_level <= 0
	if p == pe
		_goto_level = _test_eof
		next
	end
	end
	if _goto_level <= _resume
	case _segmenter_from_state_actions[cs] 
	when 4 then
# line 1 "NONE"
		begin
ts = p
		end
# line 2852 "lib/annex_29/word_segmentation.rb"
	end # from state action switch 

	_keys = _segmenter_key_offsets[cs]
	_trans = _segmenter_index_offsets[cs]
	_klen = _segmenter_single_lengths[cs]
	_break_match = false
	
	begin
	  if _klen > 0
_lower = _keys
_upper = _keys + _klen - 1

loop do
   break if _upper < _lower
   _mid = _lower + ( (_upper - _lower) >> 1 )

   if data[p].ord < _segmenter_trans_keys[_mid]
      _upper = _mid - 1
   elsif data[p].ord > _segmenter_trans_keys[_mid]
      _lower = _mid + 1
   else
      _trans += (_mid - _keys)
      _break_match = true
      break
   end
end # loop
break if _break_match
_keys += _klen
_trans += _klen
	  end
	  _klen = _segmenter_range_lengths[cs]
	  if _klen > 0
_lower = _keys
_upper = _keys + (_klen << 1) - 2
loop do
   break if _upper < _lower
   _mid = _lower + (((_upper-_lower) >> 1) & ~1)
   if data[p].ord < _segmenter_trans_keys[_mid]
     _upper = _mid - 2
   elsif data[p].ord > _segmenter_trans_keys[_mid+1]
     _lower = _mid + 2
   else
     _trans += ((_mid - _keys) >> 1)
     _break_match = true
     break
   end
end # loop
break if _break_match
_trans += _klen
	  end
	end while false
	_trans = _segmenter_indicies[_trans];
	end
	if _goto_level <= _eof_trans
	cs = _segmenter_trans_targs[_trans];

	if _segmenter_trans_actions[_trans] != 0

		case _segmenter_trans_actions[_trans] 
	when 2 then
# line 1 "NONE"
		begin
te = p+1
		end
	when 5 then
# line 10 "lib/annex_29/word_segmentation.rl"
		begin
te = p+1
 begin 
  words << data[ts...te].pack("U*")
 end
		end
	when 6 then
# line 10 "lib/annex_29/word_segmentation.rl"
		begin
te = p
p = p - 1; begin 
  words << data[ts...te].pack("U*")
 end
		end
	when 1 then
# line 10 "lib/annex_29/word_segmentation.rl"
		begin
 begin p = ((te))-1; end
 begin 
  words << data[ts...te].pack("U*")
 end
		end
# line 2941 "lib/annex_29/word_segmentation.rb"
		end # action switch 
	end

	end
	if _goto_level <= _again
	case _segmenter_to_state_actions[cs] 
	when 3 then
# line 1 "NONE"
		begin
ts = nil;		end
# line 2952 "lib/annex_29/word_segmentation.rb"
	end

	p += 1
	if p != pe
		_goto_level = _resume
		next
	end
	end
	if _goto_level <= _test_eof
	if p == eof
	if _segmenter_eof_trans[cs] > 0
		_trans = _segmenter_eof_trans[cs] - 1;
		_goto_level = _eof_trans
		next;
	end
	end

	end
	if _goto_level <= _out
		break
	end
end
	end

# line 182 "lib/annex_29/word_segmentation.rl"

  words
end