6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/bookclean.rb', line 6
def self.clean(str, lang=:pt)
return str if str==nil
str=str.rstrip.lstrip
str = UnicodeUtils.downcase(str)
str.gsub!(/\s+/, ' ')
str.gsub!(/\s*ltda.?$/, '')
str.gsub!(/\slv$/, '')
str.gsub!(/\ss\.a\.$/, '')
str.gsub!(/\ss\.a$/, '')
str.gsub!(/\ssa\.$/, '')
str.gsub!(/\ss\.\sa\.$/, '')
str.gsub!(/\ss\.\sa$/, '')
str.gsub!(/^editora/, '') if !str.match(/^editora\s+..\s+/)
str.gsub!(/editora$/, '')
str.gsub!('&', ' & ')
str.gsub!(/\s+/, ' ')
str.gsub!('Ã?', 'á')
str.gsub!('ã?', 'á')
str.gsub!(' ed.', '')
str.gsub!(/\sed$/, '')
str.gsub!(/\s-$/, '')
str = UnicodeUtils.titlecase(str)
str.gsub!(" Da ", " da ")
str.gsub!(" De ", " de ")
str.gsub!(" Do ", " do ")
str.gsub!("çao", "ção")
str.gsub!("çoes", "ções")
str=str.rstrip.lstrip
end
|