Class: ValuesReader

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging, TextUtils::ValueHelper
Defined in:
lib/textutils/reader/values_reader.rb

Instance Method Summary collapse

Methods included from TextUtils::ValueHelper

#find_grade, #find_key_n_title, #is_address?, #is_region?, #is_taglist?, #is_website?, #is_year?, #match_abv, #match_brewery, #match_city, #match_country, #match_hl, #match_kcal, #match_km_squared, #match_metro, #match_metro_flag, #match_metro_pop, #match_number, #match_og, #match_region_for_country, #match_supra, #match_supra_flag, #match_website, #match_year

Constructor Details

#initialize(path, more_attribs = {}) ⇒ ValuesReader

Returns a new instance of ValuesReader.



60
61
62
63
64
65
66
# File 'lib/textutils/reader/values_reader.rb', line 60

def initialize( path, more_attribs={} )
  @path = path

  @more_attribs = more_attribs

  @data = File.read_utf8( @path )
end

Instance Method Details

#each_lineObject

support multi line records



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/textutils/reader/values_reader.rb', line 77

def each_line   # support multi line records

  inside_line = false   # todo: find a better name? e.g. line_found?
  attribs = {}
  more_values = []
    

  @data.each_line do |line|

    ## allow alternative comment lines
    ## e.g. -- comment or
    ##      % comment
    ##  why?  # might get used by markdown for marking headers, for example

    ## NB: for now alternative comment lines not allowed as end of line style e.g
    ##  some data, more data   -- comment here

    if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
      # skip komments and do NOT copy to result (keep comments secret!)
      logger.debug 'skipping comment line'
      next
    end

    if line =~ /^\s*$/
      # kommentar oder leerzeile überspringen 
      logger.debug 'skipping blank line'
      next
    end

    # pass 1) remove possible trailing eol comment
    ##  e.g    -> nyc, New York   # Sample EOL Comment Here (with or without commas,,,,)
    ## becomes -> nyc, New York

    line = line.sub( /\s+#.+$/, '' )

    # pass 2) remove leading and trailing whitespace
    
    line = line.strip


    ### check for multiline record
    ##    must start with key and colon   e.g.   brands: 
    if line =~ /^[a-z][a-z0-9.]*[a-z0-9]:/
       # NB: every additional line is one value e.g. city:wien, etc.
       #  allows you to use any chars
       logger.debug "   multi-line record - add key-value »#{line}«"

       more_values.unshift( line.dup )   # add value upfront to array (first value); lets us keep (optional) tags as last entry; fix!! see valuereaderEx v2
       next
    else
      # NB: new record clears/ends multi-line record

      if inside_line  # check if we already processed a line? if yes; yield last line
        yield( attribs, more_values )
        attribs     = {}
        more_values = []
      end
      inside_line = true
    end


    ### guard escaped commas (e.g. \,)
    line = line.gsub( '\,', '' )  # use black club suit/=shamrock char for escaped separator
    
    ## use generic separator (allow us to configure separator)
    line = line.gsub( ',', '')
    
    ## restore escaped commas (before split)
    line = line.gsub( '', ',' )

    logger.debug "line: »#{line}«"

    values = line.split( '' )
    
    # pass 1) remove leading and trailing whitespace for values

    values = values.map { |value| value.strip }

    ##### todo remove support of comment column? (NB: must NOT include commas)
    # pass 2) remove comment columns
    
    values = values.select do |value|
      if value =~ /^#/  ## start with # treat it as a comment column; e.g. remove it
        logger.debug "   removing column with value »#{value}«"
        false
      else
        true
      end
    end

    logger.debug "  values: »#{values.join('« »')}«"

    attribs, more_values = find_key_n_title( values )

    attribs = attribs.merge( @more_attribs )  # e.g. merge country_id and other defaults if present

  end # each lines

  # do NOT forget to yield last line (if present/processed)
  if inside_line
    yield( attribs, more_values )
  end


end