11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/ruby-pinyin/backend/simple.rb', line 11
def romanize(str, tone=nil, include_punctuations=false)
res = []
return res unless str && !str.empty?
str.unpack('U*').each_with_index do |t,idx|
code = sprintf('%x',t).upcase
readings = codes[code]
if readings
res << Value.new(format(readings, tone), false)
else
val = [t].pack('U*')
if val =~ /^[_0-9a-zA-Z\s]*$/
if res.last && res.last.english?
res.last << Value.new(val, true)
elsif val != ' '
res << Value.new(val, true)
end
elsif include_punctuations
val = [Punctuation[code]].pack('H*') if Punctuation.include?(code)
(res.last ? res.last : res) << Value.new(val, false)
end
end
end
res.map {|phrase| phrase.split(/\s+/)}.flatten
end
|