Module: TextUtils::ValueHelper
- Included in:
- ValuesReader
- Defined in:
- lib/textutils/helper/value_helper_i.rb,
lib/textutils/helper/value_helper_ii.rb,
lib/textutils/helper/value_helper_iii_numbers.rb
Constant Summary collapse
- TITLE_KEY_REGEX =
if it looks like a key (only a-z lower case allowed); assume it’s a key
- also allow . in keys e.g. world.quali.america, at.cup, etc. - also allow 0-9 in keys e.g. at.2, at.3.1, etc. - also allow leading digits e.g. 1850muenchen, 3kronen, etc.
/^( [a-z][a-z0-9.]*[a-z0-9] | [a-z] # allow single letter keys e.g. n,s,etc. | [1-9][0-9]*[a-z]+ # NOTE: also allow starts with leading digits e.g. 1850muenchen, 3kronen etc.; # *MUST* be followed by letter; # note: leading zero for now *NOT* allowed )$ /x
Instance Method Summary collapse
-
#find_grade(value) ⇒ Object
NB: returns ary [grade,value] / two values.
-
#find_key_n_title(values) ⇒ Object
note: returns ary [attribs,more_values] / two values.
- #is_address?(value) ⇒ Boolean
- #is_taglist?(value) ⇒ Boolean
- #is_website?(value) ⇒ Boolean
- #is_year?(value) ⇒ Boolean
-
#match_abv(value) ⇒ Object
alcohol by volume (abv) e.g.
-
#match_brewery(value) ⇒ Object
fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models.
-
#match_hl(value) ⇒ Object
hector liters (hl) 1hl = 100l.
- #match_kcal(value) ⇒ Object
-
#match_km_squared(value) ⇒ Object
numbers w/ units.
- #match_number(value) ⇒ Object
-
#match_og(value) ⇒ Object
plato (stammwuerze/gravity?) e.g.
- #match_website(value) ⇒ Object
- #match_year(value) ⇒ Object
Instance Method Details
#find_grade(value) ⇒ Object
NB: returns ary [grade,value] / two values
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/textutils/helper/value_helper_i.rb', line 101 def find_grade( value ) # NB: returns ary [grade,value] / two values grade = 4 # defaults to grade 4 e.g *** => 1, ** => 2, * => 3, -/- => 4 # NB: stars must end field/value or start field/value # e.g. # *** Anton Bauer or # Anton Bauer *** value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_| if $1 == '***' grade = 1 elsif $1 == '**' grade = 2 elsif $1 == '*' grade = 3 else # unknown grade; not possible, is'it? end '' # remove * from title if found end value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_| if $1 == '***' grade = 1 elsif $1 == '**' grade = 2 elsif $1 == '*' grade = 3 else # unknown grade; not possible, is'it? end '' # remove * from title if found end [grade,value] end |
#find_key_n_title(values) ⇒ Object
note: returns ary [attribs,more_values] / two values
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/textutils/helper/value_helper_i.rb', line 24 def find_key_n_title( values ) # note: returns ary [attribs,more_values] / two values # todo/fix: ## change title to name ## change synonyms to alt_names (!!!) ## => use new method e.g. find_key_n_name(s) - why?? why not?? ## fix: add/configure logger for ActiveRecord!!! logger = LogKernel::Logger.root ### support autogenerate key from first title value if values[0] =~ TITLE_KEY_REGEX key_col = values[0] title_col = values[1] more_values = values[2..-1] else key_col = '<auto>' title_col = values[0] more_values = values[1..-1] end attribs = {} ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits grade, title_col = find_grade( title_col ) # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too if grade == 1 || grade == 2 || grade == 3 # grade found/present logger.debug " found grade #{grade} in title" attribs[:grade] = grade end ## fix/todo: add find parts ?? # e.g. ‹Estrella› ‹Damm› Inedit # becomes => title: 'Estrella Damm Inedit' and parts: ['Estrella','Damm'] ## title (split of optional tree hierarchy) ## e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen ## Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen ## München [Munich] › Bayern etc. ## fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!! ## pass along in :tree (or :hierarchy) ?? ## note: must include leading and trailing space for now (fix!! later) ## hack for avoiding conflict w/ parts; fix: read/parse parts first ## todo: also allow > (as an alternative to ›) title_tree = title_col.split( /[ ]+[›][ ]+/ ) ## title (split of optional synonyms) # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern # München [Munich] titles = NameTokenizer.new.tokenize( title_tree[0] ) attribs[ :title ] = titles[0] ## add optional synonyms if present attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1 if key_col == '<auto>' ## autogenerate key from first title key_col = TextUtils.title_to_key( titles[0] ) logger.debug " autogen key »#{key_col}« from title »#{titles[0]}«" end attribs[ :key ] = key_col [attribs, more_values] end |
#is_address?(value) ⇒ Boolean
41 42 43 44 45 46 47 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 41 def is_address?( value ) # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160 match_result = value =~ /\/{2}/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end |
#is_taglist?(value) ⇒ Boolean
49 50 51 52 53 54 55 56 57 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 49 def is_taglist?( value ) ### note: cannot start w/ number must be letter for now ## -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?) ## e.g. not allowed 14 ha or 5_000 hl etc. match_result = value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end |
#is_website?(value) ⇒ Boolean
60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 60 def is_website?( value ) # check for url/internet address e.g. www.ottakringer.at # - must start w/ www. or # - must end w/ .com # # fix: support more url format (e.g. w/o www. - look for .com .country code etc.) match_result = value =~ /^www\.|\.com$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end |
#is_year?(value) ⇒ Boolean
22 23 24 25 26 27 28 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 22 def is_year?( value ) # founded/established year e.g. 1776 match_result = value =~ /^[0-9]{4}$/ # match found if 0,1,2,3 etc or no match if nil # note: return bool e.g. false|true (not 0,1,2,3 etc. and nil) match_result != nil end |
#match_abv(value) ⇒ Object
alcohol by volume (abv) e.g. 5.2%
38 39 40 41 42 43 44 45 46 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 38 def match_abv( value ) # alcohol by volume (abv) e.g. 5.2% if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/ # nb: allow leading < e.g. <0.5% yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end |
#match_brewery(value) ⇒ Object
fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models
10 11 12 13 14 15 16 17 18 19 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 10 def match_brewery( value ) if value =~ /^by:/ ## by: -brewed by/brewery brewery_key = value[3..-1] ## cut off by: prefix brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key ) yield( brewery ) true # bingo - match found else false # no match found end end |
#match_hl(value) ⇒ Object
hector liters (hl) 1hl = 100l
68 69 70 71 72 73 74 75 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 68 def match_hl( value ) # hector liters (hl) 1hl = 100l if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/ # e.g. 20_000 hl or 50hl etc. yield( $1.gsub( /[ _]/, '' ).to_i ) true # bingo - match found else false # no match found end end |
#match_kcal(value) ⇒ Object
58 59 60 61 62 63 64 65 66 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 58 def match_kcal( value ) if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/ # kcal # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end |
#match_km_squared(value) ⇒ Object
numbers w/ units
27 28 29 30 31 32 33 34 35 36 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 27 def match_km_squared( value ) ## allow numbers like 453 km² or 45_000 km2 if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/ num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i yield( num ) true # bingo - match found else false # no match found end end |
#match_number(value) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 11 def match_number( value ) ## numeric ## note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000) if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/ num = value.gsub(/[ _]/, '').to_i yield( num ) true # bingo - match found else false # no match found end end |
#match_og(value) ⇒ Object
plato (stammwuerze/gravity?) e.g. 11.2°
48 49 50 51 52 53 54 55 56 |
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 48 def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2° if value =~ /^(\d+(?:\.\d+)?)°$/ # nb: no whitespace allowed between ° and number e.g. 11.2° yield( $1.to_f ) # convert to decimal? how? use float? true # bingo - match found else false # no match found end end |
#match_website(value) ⇒ Object
72 73 74 75 76 77 78 79 80 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 72 def match_website( value ) if is_website?( value ) # check for url/internet address e.g. www.ottakringer.at # fix: support more url format (e.g. w/o www. - look for .com .country code etc.) yield( value ) true # bingo - match found else false # no match found end end |
#match_year(value) ⇒ Object
31 32 33 34 35 36 37 38 |
# File 'lib/textutils/helper/value_helper_ii.rb', line 31 def match_year( value ) if is_year?( value ) # founded/established year e.g. 1776 yield( value.to_i ) true # bingo - match found else false # no match found end end |