Module: MathMetadata
- Defined in:
- lib/math_metadata_lookup/lookup.rb,
lib/math_metadata_lookup/site.rb,
lib/math_metadata_lookup/tools.rb,
lib/math_metadata_lookup/author.rb,
lib/math_metadata_lookup/entity.rb,
lib/math_metadata_lookup/result.rb,
lib/math_metadata_lookup/article.rb,
lib/math_metadata_lookup/sites/mr.rb,
lib/math_metadata_lookup/reference.rb,
lib/math_metadata_lookup/sites/zbl.rb
Overview
vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
Defined Under Namespace
Classes: Article, Author, Entity, Lookup, MR, Reference, Result, Site, ZBL
Constant Summary
collapse
- SITES =
[]
- ACCENT_REPL =
{
"`" => "\u0300", "'" => "\u0301", "^" => "\u0302", '"' => "\u0308", "~" => "\u0303", "H" => "\u030b", "c" => "\u0327", "=" => "\u0304", "." => "\u0307", "r" => "\u030a", "u" => "\u0306", "v" => "\u030c" }
Class Method Summary
collapse
Class Method Details
.latex_to_utf8(s) ⇒ Object
100
101
102
103
104
105
106
|
# File 'lib/math_metadata_lookup/tools.rb', line 100
def latex_to_utf8( s )
str = s.gsub( /\\(.)(?:([a-zA-Z])|\{([a-zA-Z])\}|\{\\([a-zA-Z])\})/ ) do |match|
accent = ACCENT_REPL[$1]
char = $2 || $3 || $4
accent ? Unicode.normalize_KC( char + accent ) : match
end
end
|
.levenshtein_distance(s1, s2) ⇒ Object
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
# File 'lib/math_metadata_lookup/tools.rb', line 10
def levenshtein_distance( s1, s2 )
return 1.0 if s1 == s2
s1u, s2u = s1.split(//u), s2.split(//u)
tab = Array.new(s1u.size+1){ Array.new(s2u.size+1){0} }
(0..s1u.size).each do |i|
tab[i][0] = i
end
(0..s2u.size).each do |j|
tab[0][j] = j
end
(1..s2u.size).each do |j|
(1..s1u.size).each do |i|
if s2u[j-1] == s1u[i-1]
tab[i][j] = tab[i-1][j-1]
else
tab[i][j] = [
tab[i-1][j] + 1,
tab[i][j-1] + 1,
tab[i-1][j-1] + 1
].sort.first
end
end
end
1 - (tab.last.last.to_f / ([s1u.size, s2u.size].sort.last))
end
|
.normalize_mscs(mscs) ⇒ Object
45
46
47
|
# File 'lib/math_metadata_lookup/tools.rb', line 45
def normalize_mscs( mscs )
mscs.map{|m| m.split(/,|;/) }.flatten.map{|m| m =~ /\s*\(?([^\s\)\(]+)\)?\s*/; $1}
end
|
.normalize_name(name) ⇒ Object
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# File 'lib/math_metadata_lookup/tools.rb', line 50
def normalize_name( name )
trans = latex_to_utf8(name.to_s)
trans = I18n.transliterate(trans)
trans.sub! %r{\bjr\.(\b|$)}i, ' '
trans.sub! %r{(\W|^)\w\.}i, ' '
trans.gsub( /([^\s,])?\.([^\s,])/, '\1. \2' )
trans
end
|
.normalize_range(range) ⇒ Object
40
41
42
|
# File 'lib/math_metadata_lookup/tools.rb', line 40
def normalize_range( range )
range.to_s.gsub(/–|--/,'-')
end
|
.normalize_text(s) ⇒ Object
76
77
78
79
80
81
82
83
|
# File 'lib/math_metadata_lookup/tools.rb', line 76
def normalize_text( s )
str = latex_to_utf8(s)
str = I18n.transliterate(str).downcase
str = remove_punctuation(str)
str.gsub!(%r{\W+}, ' ')
str.gsub!(%r{(?:the|a|of|)\s+}i, ' ')
str.strip
end
|
.remove_punctuation(s) ⇒ Object
69
70
71
72
73
|
# File 'lib/math_metadata_lookup/tools.rb', line 69
def remove_punctuation( s )
str = s.gsub %r{(\w)[.,]+( |$)}i, '\1 '
str.gsub! %r{(\s)[.,]+( |$)}i, '\1 '
str.strip
end
|