Module: WorldDb::Matcher

Included in:
ReaderBase
Defined in:
lib/worlddb/matcher.rb

Constant Summary collapse

WORLD_COUNTRY_CODE_PATTERN =

note: returns code as capture

'([a-z]{2,3})'
WORLD_COUNTRY_CLASSIC_PATTERN =

note: if you use “” need to double escape backslash!!!

"#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+"
WORLD_COUNTRY_MODERN_PATTERN =

note: if you use “” need to double escape backslash!!!

"[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+"
WORLD_REGION_CODE_PATTERN =

note: returns code as capture

'([a-z]{1,3})'
WORLD_REGION_CLASSIC_PATTERN =
"#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
WORLD_REGION_MODERN_PATTERN =
"[0-9]+--#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"
WORLD_ADMIN_MODERN_PATTERN =

note: returns name as capture (no code required)

"[0-9]+--([^\\/]+)"
WORLD_OPT_FOLDERS_PATTERN =

allow optional folders – TODO: add restriction ?? e.g. must be 4+ alphas ???

"(?:\\/[^\\/]+)*"

Instance Method Summary collapse

Instance Method Details

#match_adm2_for_country(name, &blk) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/worlddb/matcher.rb', line 156

def match_adm2_for_country( name, &blk )
  ## note: also try synonyms e.g. districts|counties
  ## note: counties might also be an adm3 match
  found = match_xxx_for_country_n_adm1( name, 'districts', &blk )
  found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found
  found
end

#match_adm3_for_country(name, &blk) ⇒ Object



164
165
166
# File 'lib/worlddb/matcher.rb', line 164

def match_adm3_for_country( name, &blk )
  match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk )
end

#match_cities_for_country(name, &blk) ⇒ Object



170
171
172
173
174
175
176
# File 'lib/worlddb/matcher.rb', line 170

def match_cities_for_country( name, &blk )
  ## todo: check if there's a better (more ruby way) to pass along code block ??
  ## e.g. try
  ##   match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) }

  match_xxx_for_country( name, 'cities', &blk )
end

#match_countries_for_continent(name) ⇒ Object



207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/worlddb/matcher.rb', line 207

def match_countries_for_continent( name )
  if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/     # e.g. africa/countries or america/countries
    ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
    ## auto-add continent (from folder structure) as tag
    ## fix: allow dash/hyphen/minus in tag
    continent = $1.dup
    yield( continent )
    true
  else
    false # no match found
  end
end

#match_regions_abbr_for_country(name, &blk) ⇒ Object

NB: . gets escaped for regex, that is, .



185
186
187
188
189
190
# File 'lib/worlddb/matcher.rb', line 185

def match_regions_abbr_for_country( name, &blk )  # NB: . gets escaped for regex, that is, \.
  ## also try synonyms e.g. old regions (if not match for states)
  found = match_xxx_for_country( name, 'states\.abbr', &blk )
  found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
  found
end

#match_regions_for_country(name, &blk) ⇒ Object



178
179
180
181
182
183
# File 'lib/worlddb/matcher.rb', line 178

def match_regions_for_country( name, &blk )
  ## also try synonyms e.g. old regions (if not match for states)
  found = match_xxx_for_country( name, 'states', &blk )
  found = match_xxx_for_country( name, 'regions', &blk ) unless found
  found
end

#match_regions_iso_for_country(name, &blk) ⇒ Object

NB: . gets escaped for regex, that is, .



192
193
194
195
196
197
# File 'lib/worlddb/matcher.rb', line 192

def match_regions_iso_for_country( name, &blk )  # NB: . gets escaped for regex, that is, \.
  ## also try synonyms e.g. old regions (if not match for states)
  found = match_xxx_for_country( name, 'states\.iso', &blk )
  found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
  found 
end

#match_regions_nuts_for_country(name, &blk) ⇒ Object

NB: . gets escaped for regex, that is, .



199
200
201
202
203
204
# File 'lib/worlddb/matcher.rb', line 199

def match_regions_nuts_for_country( name, &blk )  # NB: . gets escaped for regex, that is, \.
  ## also try synonyms e.g. old regions (if not match for states)
  found = match_xxx_for_country( name, 'states\.nuts', &blk )
  found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
  found
end

#match_xxx_for_country(name, xxx) ⇒ Object

xxx e.g. cities|regions|beers|breweries



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/worlddb/matcher.rb', line 24

def match_xxx_for_country( name, xxx )  # xxx e.g. cities|regions|beers|breweries
  #      auto-add required country code (from folder structure)
  #  note: always let match_xxx_for_country_n_region go first

  # note: allow  /cities and /1--hokkaido--cities
  xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})"    # note: double escape \\ required for backslash

  ##
  ## todo: add $-anchor at the end of pattern - why? why not?? (will include .txt or .yaml??)

  if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{xxx_pattern}/   ||    # (1)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{xxx_pattern}/    ||    # (2)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CODE_PATTERN}\/#{xxx_pattern}/      ||    # (3)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/[0-9]+--[^\/]+\/#{xxx_pattern}/ ||   # (4)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CODE_PATTERN}-[^\/]+--#{xxx}/    # (5)

    country_key = $1.dup
    yield( country_key )
    true # bingo - match found

    ######
    # (1)  new style: e.g. /at-austria/beers or ^at-austria!/cities
    #
    # (2)  new-new style e.g. /1--at-austria--central/cities
    #
    # (3)  classic style: e.g. /at/beers (europe/at/cities)
    #
    # (4) new style w/ region w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities
    #
    # (5)  compact style (country part of filename):
    #   e.g. /at-austria--cities or /europe/at-austria--cities
  else
    false # no match found
  end
end

#match_xxx_for_country_n_adm1(name, xxx) ⇒ Object

xxx e.g. districts|counties|etc.



109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/worlddb/matcher.rb', line 109

def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc.

  # auto-add required country n regions (from folder structure)
  #
  #  e.g.  de-deutschland!/3--by-bayern/districts  (regierungsbezirke)
  #        europe/de-deutschland!/3--by-bayern/districts
  #
  #    at-austria!/1--n-niederoesterreich/counties  (bezirke)

  xxx_pattern           = "#{xxx}"

  if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/  ||
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/

    country_key = $1.dup
    region_key  = $2.dup
    yield( country_key, region_key )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_xxx_for_country_n_adm1_n_adm2(name, xxx) ⇒ Object

xxx e.g. districts|counties|etc.



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/worlddb/matcher.rb', line 133

def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc.

  # auto-add required country n regions (from folder structure)
  #
  #  e.g.  de-deutschland!/3--by-bayern/4--oberfranken/counties  (landkreise)
  #        europe/de-deutschland!/3--by-bayern/4--oberfranken/counties

  xxx_pattern           = "#{xxx}"

  if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/  ||
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/

    country_key = $1.dup
    region_key  = $2.dup
    adm2        = $3.dup   # lowercase name e.g. oberfranken, oberbayern, etc.
    yield( country_key, region_key, adm2 )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_xxx_for_country_n_region(name, xxx) ⇒ Object

xxx e.g. wine|wineries



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/worlddb/matcher.rb', line 61

def match_xxx_for_country_n_region( name, xxx ) # xxx e.g. wine|wineries

  # auto-add required country n region code (from folder structure)

  ## -- allow opt_folders after long regions (e.g. additional subregion/zone)
  ## -- allow anything (prefixes) before -- for xxx
  #       e.g.  at-austria!/1--n-niederoesterreich--eastern/wagram--wines
  #             at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines

  # note: allow  /cities and /1--hokkaido--cities and /hokkaido--cities too
  # note: allow anything before xxx for now  -- use/require dash (--) why, why not??
  oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})"
  xxx_pattern           = "(?:#{xxx}|[^\\/]+#{xxx})"   # note: double escape \\ required for backslash

  ## note: for now only (style #2) n (style #3)  that is long region allow opt folders

  if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/  ||                # (1)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/   || # (2)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/  || # (3)
     name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/            # (4)

    #######
    # nb: country must start name (^) or coming after / e.g. europe/at-austria/...
    # (1)
    # new style: e.g.  /at-austria/w-wien/cities or
    #                  ^at-austria!/w-wien/cities
    # (2)
    # new new style e.g.  /1--at-austria--central/1--w-wien--eastern/cities
    #
    # (3)
    #  new new mixed style e.g.  /at-austria/1--w-wien--eastern/cities
    #      "classic" country plus new new region
    #
    # (4)
    #  new new mixed style e.g.  /1--at-austria--central/w-wien/cities
    #      new new country plus "classic" region

    country_key = $1.dup
    region_key  = $2.dup
    yield( country_key, region_key )
    true # bingo - match found
  else
    false # no match found
  end
end