Module: TextUtils::ValueHelper

Included in:
ValuesReader
Defined in:
lib/textutils/helper/value_helper_i.rb,
lib/textutils/helper/value_helper_ii.rb,
lib/textutils/helper/value_helper_iii_numbers.rb

Constant Summary collapse

TITLE_KEY_REGEX =

if it looks like a key (only a-z lower case allowed); assume it’s a key

- also allow . in keys e.g. world.quali.america, at.cup, etc.
- also allow 0-9 in keys e.g. at.2, at.3.1, etc.
- also allow leading digits e.g. 1850muenchen, 3kronen, etc.
/^(
 [a-z][a-z0-9.]*[a-z0-9]
   |
 [a-z]         # allow single letter keys e.g. n,s,etc.
   |
 [1-9][0-9]*[a-z]+  # NOTE: also allow starts with leading digits e.g. 1850muenchen, 3kronen etc.;
               #   *MUST* be followed by letter;
               #   note: leading zero for now *NOT* allowed
 )$
/x

Instance Method Summary collapse

Instance Method Details

#find_grade(value) ⇒ Object

NB: returns ary [grade,value] / two values



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/textutils/helper/value_helper_i.rb', line 101

def find_grade( value )  # NB: returns ary [grade,value] / two values
  grade = 4  # defaults to grade 4  e.g  *** => 1, ** => 2, * => 3, -/- => 4

  # NB: stars must end field/value or start field/value
  #  e.g.
  #  *** Anton Bauer   or
  #  Anton Bauer ***

  value = value.sub( /^\s*(\*{1,3})\s+/ ) do |_|
    if $1 == '***'
      grade = 1
    elsif $1 == '**'
      grade = 2
    elsif $1 == '*'
      grade = 3
    else
      # unknown grade; not possible, is'it?
    end
    ''  # remove * from title if found
  end

  value = value.sub( /\s+(\*{1,3})\s*$/ ) do |_|
    if $1 == '***'
      grade = 1
    elsif $1 == '**'
      grade = 2
    elsif $1 == '*'
      grade = 3
    else
      # unknown grade; not possible, is'it?
    end
    ''  # remove * from title if found
  end

  [grade,value]
end

#find_key_n_title(values) ⇒ Object

note: returns ary [attribs,more_values] / two values



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/textutils/helper/value_helper_i.rb', line 24

def find_key_n_title( values )  # note: returns ary [attribs,more_values] / two values
  # todo/fix:
  ##  change title to name 
  ##  change synonyms to alt_names (!!!)
  ##   => use new method e.g. find_key_n_name(s) - why?? why not??


  ## fix: add/configure logger for ActiveRecord!!!
  logger = LogKernel::Logger.root


  ### support autogenerate key from first title value
  if values[0] =~ TITLE_KEY_REGEX
    key_col         = values[0]
    title_col       = values[1]
    more_values     = values[2..-1]
  else
    key_col         = '<auto>'
    title_col       = values[0]
    more_values     = values[1..-1]
  end

  attribs = {}

  ## check title_col for grade (e.g. ***/**/*) and use returned stripped title_col if exits
  grade, title_col = find_grade( title_col )

  # NB: for now - do NOT include default grade e.g. if grade (***/**/*) not present; attrib will not be present too
  if grade == 1 || grade == 2 || grade == 3  # grade found/present
    logger.debug "   found grade #{grade} in title"
    attribs[:grade] = grade
  end

 
  ## fix/todo: add find parts ??
  #  e.g. ‹Estrella› ‹Damm› Inedit
  #    becomes =>   title: 'Estrella Damm Inedit'  and  parts: ['Estrella','Damm']



  ## title (split of optional tree hierarchy)
  ##  e.g. Leverkusen › Köln/Bonn › Nordrhein-Westfalen
  ##       Gelsenkirchen › Ruhrgebiet › Nordrhein-Westfalen
  ##       München [Munich] › Bayern  etc.

  ##  fix!!!! - trailing hierarchy get *ignored* for now!!! - fix!!
  ##    pass along in  :tree (or :hierarchy) ??


  ## note: must include leading and trailing space for now (fix!! later)
  ##   hack for avoiding conflict w/ parts; fix: read/parse parts first
  ##  todo: also allow > (as an alternative to ›)

  title_tree = title_col.split( /[ ]+[›][ ]+/ )

  ## title (split of optional synonyms)
  # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
  #      München [Munich]
  titles = NameTokenizer.new.tokenize( title_tree[0] )

  attribs[ :title ]    =  titles[0]

  ## add optional synonyms if present    
  attribs[ :synonyms ] =  titles[1..-1].join('|')  if titles.size > 1

  if key_col == '<auto>'
    ## autogenerate key from first title
    key_col = TextUtils.title_to_key( titles[0] )
    logger.debug "   autogen key »#{key_col}« from title »#{titles[0]}«"
  end

  attribs[ :key ] = key_col

  [attribs, more_values]
end

#is_address?(value) ⇒ Boolean

Returns:

  • (Boolean)


41
42
43
44
45
46
47
# File 'lib/textutils/helper/value_helper_ii.rb', line 41

def is_address?( value )
  # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
  match_result =  value =~ /\/{2}/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end

#is_taglist?(value) ⇒ Boolean

Returns:

  • (Boolean)


49
50
51
52
53
54
55
56
57
# File 'lib/textutils/helper/value_helper_ii.rb', line 49

def is_taglist?( value )
  ### note: cannot start w/ number must be letter for now
  ##  -- in the future allow free standing years (e.g. 1980 etc.?? why? why not?)
  ##  e.g. not allowed  14 ha or 5_000 hl etc.
  match_result =  value =~ /^([a-z][a-z0-9\|_ ]*[a-z0-9]|[a-z])$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end

#is_website?(value) ⇒ Boolean

Returns:

  • (Boolean)


60
61
62
63
64
65
66
67
68
69
70
# File 'lib/textutils/helper/value_helper_ii.rb', line 60

def is_website?( value )
  # check for url/internet address e.g. www.ottakringer.at
  #  - must start w/  www. or
  #  - must end w/   .com
  #
  # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
  match_result =  value =~ /^www\.|\.com$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end

#is_year?(value) ⇒ Boolean

Returns:

  • (Boolean)


22
23
24
25
26
27
28
# File 'lib/textutils/helper/value_helper_ii.rb', line 22

def is_year?( value )
  # founded/established year e.g. 1776
  match_result =  value =~ /^[0-9]{4}$/
  # match found if 0,1,2,3 etc or no match if nil
  # note: return bool e.g. false|true  (not 0,1,2,3 etc. and nil)
  match_result != nil
end

#match_abv(value) ⇒ Object

alcohol by volume (abv) e.g. 5.2%



38
39
40
41
42
43
44
45
46
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 38

def match_abv( value )  # alcohol by volume (abv) e.g. 5.2% 
  if value =~ /^<?\s*(\d+(?:\.\d+)?)\s*%$/
    # nb: allow leading < e.g. <0.5%
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end

#match_brewery(value) ⇒ Object

fix!!!!: move to beerdb ??? why? why not?? - yes, move to beerdb-models



10
11
12
13
14
15
16
17
18
19
# File 'lib/textutils/helper/value_helper_ii.rb', line 10

def match_brewery( value )
  if value =~ /^by:/   ## by:  -brewed by/brewery
    brewery_key = value[3..-1]  ## cut off by: prefix
    brewery = BeerDb::Model::Brewery.find_by_key!( brewery_key )
    yield( brewery )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_hl(value) ⇒ Object

hector liters (hl) 1hl = 100l



68
69
70
71
72
73
74
75
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 68

def match_hl( value )  # hector liters (hl) 1hl = 100l
  if value =~ /^(?:([0-9][0-9_ ]+[0-9]|[0-9]{1,2})\s*hl)$/  # e.g. 20_000 hl or 50hl etc.
    yield( $1.gsub( /[ _]/, '' ).to_i )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_kcal(value) ⇒ Object



58
59
60
61
62
63
64
65
66
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 58

def match_kcal( value )
  if value =~ /^(\d+(?:\.\d+)?)\s*kcal(?:\/100ml)?$/  # kcal
    # nb: allow 44.4 kcal/100ml or 44.4 kcal or 44.4kcal
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end

#match_km_squared(value) ⇒ Object

numbers w/ units



27
28
29
30
31
32
33
34
35
36
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 27

def match_km_squared( value )
  ## allow numbers like 453 km² or 45_000 km2
  if value =~ /^([0-9][0-9 _]+[0-9]|[0-9]{1,2})(?:\s*(?:km2|km²)\s*)$/
    num = value.gsub( 'km2', '').gsub( 'km²', '' ).gsub(/[ _]/, '').to_i
    yield( num )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_number(value) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 11

def match_number( value )
  ## numeric
  ##   note: can use any _ or spaces inside digits e.g. 1_000_000 or 1 000 000)
  if value =~ /^([0-9][0-9 _]+[0-9])|([0-9]{1,2})$/
    num = value.gsub(/[ _]/, '').to_i
    yield( num )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_og(value) ⇒ Object

plato (stammwuerze/gravity?) e.g. 11.2°



48
49
50
51
52
53
54
55
56
# File 'lib/textutils/helper/value_helper_iii_numbers.rb', line 48

def match_og( value ) # plato (stammwuerze/gravity?) e.g. 11.2°
  if value =~ /^(\d+(?:\.\d+)?)°$/
    # nb: no whitespace allowed between ° and number e.g. 11.2°
    yield( $1.to_f )  # convert to decimal? how? use float?
    true # bingo - match found
  else
    false # no match found
  end
end

#match_website(value) ⇒ Object



72
73
74
75
76
77
78
79
80
# File 'lib/textutils/helper/value_helper_ii.rb', line 72

def match_website( value )
  if is_website?( value )   # check for url/internet address e.g. www.ottakringer.at
    # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
    yield( value )
    true # bingo - match found
  else
    false # no match found
  end
end

#match_year(value) ⇒ Object



31
32
33
34
35
36
37
38
# File 'lib/textutils/helper/value_helper_ii.rb', line 31

def match_year( value )
  if is_year?( value )  # founded/established year e.g. 1776
    yield( value.to_i )
    true # bingo - match found
  else
    false # no match found
  end
end