Module: WorldDb::Matcher
- Included in:
- ReaderBase
- Defined in:
- lib/worlddb/matcher.rb
Constant Summary collapse
- WORLD_COUNTRY_CODE_PATTERN =
note: returns code as capture
'([a-z]{2,3})'- WORLD_COUNTRY_CLASSIC_PATTERN =
note: if you use “” need to double escape backslash!!!
"#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+"- WORLD_COUNTRY_MODERN_PATTERN =
note: if you use “” need to double escape backslash!!!
"[0-9]+--#{WORLD_COUNTRY_CODE_PATTERN}-[^\\/]+"- WORLD_REGION_CODE_PATTERN =
note: returns code as capture
'([a-z]{1,3})'- WORLD_REGION_CLASSIC_PATTERN =
"#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"- WORLD_REGION_MODERN_PATTERN =
"[0-9]+--#{WORLD_REGION_CODE_PATTERN}-[^\\/]+"- WORLD_ADMIN_MODERN_PATTERN =
note: returns name as capture (no code required)
"[0-9]+--([^\\/]+)"- WORLD_OPT_FOLDERS_PATTERN =
allow optional folders – TODO: add restriction ?? e.g. must be 4+ alphas ???
"(?:\\/[^\\/]+)*"
Instance Method Summary collapse
- #match_adm2_for_country(name, &blk) ⇒ Object
- #match_adm3_for_country(name, &blk) ⇒ Object
- #match_cities_for_country(name, &blk) ⇒ Object
- #match_countries_for_continent(name) ⇒ Object
-
#match_regions_abbr_for_country(name, &blk) ⇒ Object
NB: .
- #match_regions_for_country(name, &blk) ⇒ Object
-
#match_regions_iso_for_country(name, &blk) ⇒ Object
NB: .
-
#match_regions_nuts_for_country(name, &blk) ⇒ Object
NB: .
-
#match_xxx_for_country(name, xxx) ⇒ Object
xxx e.g.
-
#match_xxx_for_country_n_adm1(name, xxx) ⇒ Object
xxx e.g.
-
#match_xxx_for_country_n_adm1_n_adm2(name, xxx) ⇒ Object
xxx e.g.
-
#match_xxx_for_country_n_region(name, xxx) ⇒ Object
xxx e.g.
Instance Method Details
#match_adm2_for_country(name, &blk) ⇒ Object
156 157 158 159 160 161 162 |
# File 'lib/worlddb/matcher.rb', line 156 def match_adm2_for_country( name, &blk ) ## note: also try synonyms e.g. districts|counties ## note: counties might also be an adm3 match found = match_xxx_for_country_n_adm1( name, 'districts', &blk ) found = match_xxx_for_country_n_adm1( name, 'counties', &blk ) unless found found end |
#match_adm3_for_country(name, &blk) ⇒ Object
164 165 166 |
# File 'lib/worlddb/matcher.rb', line 164 def match_adm3_for_country( name, &blk ) match_xxx_for_country_n_adm1_n_adm2( name, 'counties', &blk ) end |
#match_cities_for_country(name, &blk) ⇒ Object
170 171 172 173 174 175 176 |
# File 'lib/worlddb/matcher.rb', line 170 def match_cities_for_country( name, &blk ) ## todo: check if there's a better (more ruby way) to pass along code block ?? ## e.g. try ## match_xxx_for_country( name, 'cities') { |country_key| yield(country_key) } match_xxx_for_country( name, 'cities', &blk ) end |
#match_countries_for_continent(name) ⇒ Object
207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/worlddb/matcher.rb', line 207 def match_countries_for_continent( name ) if name =~ /^([a-z][a-z\-_]+[a-z])\/countries/ # e.g. africa/countries or america/countries ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.) ## auto-add continent (from folder structure) as tag ## fix: allow dash/hyphen/minus in tag continent = $1.dup yield( continent ) true else false # no match found end end |
#match_regions_abbr_for_country(name, &blk) ⇒ Object
NB: . gets escaped for regex, that is, .
185 186 187 188 189 190 |
# File 'lib/worlddb/matcher.rb', line 185 def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \. ## also try synonyms e.g. old regions (if not match for states) found = match_xxx_for_country( name, 'states\.abbr', &blk ) found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found found end |
#match_regions_for_country(name, &blk) ⇒ Object
178 179 180 181 182 183 |
# File 'lib/worlddb/matcher.rb', line 178 def match_regions_for_country( name, &blk ) ## also try synonyms e.g. old regions (if not match for states) found = match_xxx_for_country( name, 'states', &blk ) found = match_xxx_for_country( name, 'regions', &blk ) unless found found end |
#match_regions_iso_for_country(name, &blk) ⇒ Object
NB: . gets escaped for regex, that is, .
192 193 194 195 196 197 |
# File 'lib/worlddb/matcher.rb', line 192 def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \. ## also try synonyms e.g. old regions (if not match for states) found = match_xxx_for_country( name, 'states\.iso', &blk ) found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found found end |
#match_regions_nuts_for_country(name, &blk) ⇒ Object
NB: . gets escaped for regex, that is, .
199 200 201 202 203 204 |
# File 'lib/worlddb/matcher.rb', line 199 def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \. ## also try synonyms e.g. old regions (if not match for states) found = match_xxx_for_country( name, 'states\.nuts', &blk ) found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found found end |
#match_xxx_for_country(name, xxx) ⇒ Object
xxx e.g. cities|regions|beers|breweries
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/worlddb/matcher.rb', line 24 def match_xxx_for_country( name, xxx ) # xxx e.g. cities|regions|beers|breweries # auto-add required country code (from folder structure) # note: always let match_xxx_for_country_n_region go first # note: allow /cities and /1--hokkaido--cities xxx_pattern = "(?:#{xxx}|[0-9]+--[^\\/]+?--#{xxx})" # note: double escape \\ required for backslash ## ## todo: add $-anchor at the end of pattern - why? why not?? (will include .txt or .yaml??) if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{xxx_pattern}/ || # (1) name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{xxx_pattern}/ || # (2) name =~ /(?:^|\/)#{WORLD_COUNTRY_CODE_PATTERN}\/#{xxx_pattern}/ || # (3) name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/[0-9]+--[^\/]+\/#{xxx_pattern}/ || # (4) name =~ /(?:^|\/)#{WORLD_COUNTRY_CODE_PATTERN}-[^\/]+--#{xxx}/ # (5) country_key = $1.dup yield( country_key ) true # bingo - match found ###### # (1) new style: e.g. /at-austria/beers or ^at-austria!/cities # # (2) new-new style e.g. /1--at-austria--central/cities # # (3) classic style: e.g. /at/beers (europe/at/cities) # # (4) new style w/ region w/o abbrev/code e.g. /ja-japon/1--hokkaido/cities # # (5) compact style (country part of filename): # e.g. /at-austria--cities or /europe/at-austria--cities else false # no match found end end |
#match_xxx_for_country_n_adm1(name, xxx) ⇒ Object
xxx e.g. districts|counties|etc.
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/worlddb/matcher.rb', line 109 def match_xxx_for_country_n_adm1( name, xxx ) # xxx e.g. districts|counties|etc. # auto-add required country n regions (from folder structure) # # e.g. de-deutschland!/3--by-bayern/districts (regierungsbezirke) # europe/de-deutschland!/3--by-bayern/districts # # at-austria!/1--n-niederoesterreich/counties (bezirke) xxx_pattern = "#{xxx}" if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{xxx_pattern}/ || name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/ country_key = $1.dup region_key = $2.dup yield( country_key, region_key ) true # bingo - match found else false # no match found end end |
#match_xxx_for_country_n_adm1_n_adm2(name, xxx) ⇒ Object
xxx e.g. districts|counties|etc.
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/worlddb/matcher.rb', line 133 def match_xxx_for_country_n_adm1_n_adm2( name, xxx ) # xxx e.g. districts|counties|etc. # auto-add required country n regions (from folder structure) # # e.g. de-deutschland!/3--by-bayern/4--oberfranken/counties (landkreise) # europe/de-deutschland!/3--by-bayern/4--oberfranken/counties xxx_pattern = "#{xxx}" if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ || name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{WORLD_ADMIN_MODERN_PATTERN}\/#{xxx_pattern}/ country_key = $1.dup region_key = $2.dup adm2 = $3.dup # lowercase name e.g. oberfranken, oberbayern, etc. yield( country_key, region_key, adm2 ) true # bingo - match found else false # no match found end end |
#match_xxx_for_country_n_region(name, xxx) ⇒ Object
xxx e.g. wine|wineries
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/worlddb/matcher.rb', line 61 def match_xxx_for_country_n_region( name, xxx ) # xxx e.g. wine|wineries # auto-add required country n region code (from folder structure) ## -- allow opt_folders after long regions (e.g. additional subregion/zone) ## -- allow anything (prefixes) before -- for xxx # e.g. at-austria!/1--n-niederoesterreich--eastern/wagram--wines # at-austria!/1--n-niederoesterreich--eastern/wagram--wagram--wines # note: allow /cities and /1--hokkaido--cities and /hokkaido--cities too # note: allow anything before xxx for now -- use/require dash (--) why, why not?? oldoldold_xxx_pattern = "(?:#{xxx}|[^\\/]+--#{xxx})" xxx_pattern = "(?:#{xxx}|[^\\/]+#{xxx})" # note: double escape \\ required for backslash ## note: for now only (style #2) n (style #3) that is long region allow opt folders if name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (1) name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (2) name =~ /(?:^|\/)#{WORLD_COUNTRY_CLASSIC_PATTERN}\/#{WORLD_REGION_MODERN_PATTERN}#{WORLD_OPT_FOLDERS_PATTERN}\/#{xxx_pattern}/ || # (3) name =~ /(?:^|\/)#{WORLD_COUNTRY_MODERN_PATTERN}\/#{WORLD_REGION_CLASSIC_PATTERN}\/#{xxx_pattern}/ # (4) ####### # nb: country must start name (^) or coming after / e.g. europe/at-austria/... # (1) # new style: e.g. /at-austria/w-wien/cities or # ^at-austria!/w-wien/cities # (2) # new new style e.g. /1--at-austria--central/1--w-wien--eastern/cities # # (3) # new new mixed style e.g. /at-austria/1--w-wien--eastern/cities # "classic" country plus new new region # # (4) # new new mixed style e.g. /1--at-austria--central/w-wien/cities # new new country plus "classic" region country_key = $1.dup region_key = $2.dup yield( country_key, region_key ) true # bingo - match found else false # no match found end end |