Class: EventDb::EventReader

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/eventdb/reader.rb

Defined Under Namespace

Classes: Event

Constant Summary collapse

MONTH_EN_TO_MM =
{
'Jan' => '1',
'Feb' => '2',
'Mar' => '3', 'March' => '3',
'Apr' => '4', 'April' => '4',
'May' => '5',
'Jun' => '6', 'June' => '6',
'Jul' => '7', 'July' => '7',
'Aug' => '8',
'Sep' => '9', 'Sept' => '9',
'Oct' => '10',
'Nov' => '11',
'Dec' => '12' }
MONTH_EN =

e.g. ‘Jan|Feb|March|Mar|…’

MONTH_EN_TO_MM.keys.join('|')
DATE_ENTRY_REGEX =

examples:

  • 2015 @ Salzburg, Austria; Oct/17+18

  • 2015 @ Brussels / Brussel / Bruxelles; Jan/31+Feb/1

  • 2014 @ Porto de Galinhas, Pernambuco; Apr/24-27 (formerly: Abril Pro Ruby)

/\s+
(?<year>201\d)   ## year
\s+
 @            ## at location
\s+
[^;]+        ##  use ; as separator between place and date
;
\s+
(?<start_month_en>#{MONTH_EN})
\/
(?<start_day>[0-9]{1,2})          ## start date
(?:
  [+\-]     ## use + for two days, - for more than two days
  (?:
    (?<end_month_en>#{MONTH_EN})
    \/
  )?   ## optional end_month
  (?<end_day>[0-9]{1,2})
)? ## optional end_date
/x
LINK_ENTRY_REGEX =

example:

/\s+
  \[
    [^\]]+
  \]
  \(
   [^\)]+
  \)
/x

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ EventReader

Returns a new instance of EventReader.



28
29
30
# File 'lib/eventdb/reader.rb', line 28

def initialize( text )
  @text = text
end

Class Method Details

.from_file(path) ⇒ Object



19
20
21
# File 'lib/eventdb/reader.rb', line 19

def self.from_file( path )
  self.from_string( File.read_utf8( path ) )
end

.from_string(text) ⇒ Object



23
24
25
# File 'lib/eventdb/reader.rb', line 23

def self.from_string( text )
  self.new( text )
end

.from_url(src) ⇒ Object

note: src assumed a string



14
15
16
17
# File 'lib/eventdb/reader.rb', line 14

def self.from_url( src )   # note: src assumed a string
  worker = Fetcher::Worker.new
  self.from_string( worker.read_utf8!( src ) )
end

Instance Method Details

#readObject



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/eventdb/reader.rb', line 88

def read

  events = []
  stack  = []   ## header/heading stack;  note: last_stack is stack.size; starts w/ 0

  last_link_entry = nil

  # note: cut out; remove all html comments e.g. <!-- -->
  #   supports multi-line comments too (e.g. uses /m - make dot match newlines)
  text = @text.gsub( /<!--.+?-->/m, '' )  ## todo/fix: track/log cut out comments!!!

  text.each_line do |line|

    logger.debug "line: >#{line}<"
    line = line.rstrip  ## remove (possible) trailing newline

    break if line =~ /^## More/   #  stop when hitting >## More< section
    next  if line =~ /^\s*$/      #  skip blank lines

    m = nil
    if line =~ /^[ ]*(#+)[ ]+/    ## heading/headers - note: must escpape #
        s = StringScanner.new( line )
        s.skip( /[ ]*/ )  ## skip whitespaces
        markers = s.scan( /#+/)
        level   = markers.size
        s.skip( /[ ]*/ )  ## skip whitespaces
        title   = s.rest.rstrip
        
        logger.debug " heading level: #{level}, title: >#{title}<"

        level_diff = level - stack.size

        if level_diff > 0
          logger.debug "[EventReader]    up  +#{level_diff}"
          if level_diff > 1
            logger.error "fatal: level step must be one (+1) is +#{level_diff}"
            fail "[EventReader] level step must be one (+1) is +#{level_diff}"
          end
        elsif level_diff < 0
          logger.debug "[EventReader]    down #{level_diff}"
          level_diff.abs.times { stack.pop }
          stack.pop
        else
          ## same level
          stack.pop
        end
        stack.push( [level,title] )
        logger.debug "  stack: #{stack.inspect}"

    elsif line =~ /^[ ]*-[ ]+/     ## list item
      if( m=LINK_ENTRY_REGEX.match( line ) )
        logger.debug " link entry: #{line}"

        s = StringScanner.new( line )
        s.skip( /[ ]*-[ ]*/ )  ## skip leading list
        last_link_entry = s.rest.rstrip  ## remove trailing spaces to
      elsif( m=DATE_ENTRY_REGEX.match( line ) )
        year           = m[:year]
        
        start_month_en = m[:start_month_en]
        start_day      = m[:start_day]

        start_month    = MONTH_EN_TO_MM[ start_month_en ]
        start_date = Date.new( year.to_i, start_month.to_i, start_day.to_i )


        end_month_en   = m[:end_month_en]
        end_month_en    = start_month_en    if end_month_en.nil? # no end month; use same as start

        end_day        = m[:end_day]
        end_day        = start_day   if end_day.nil?  # no end day; single day event (use start day)

        end_month     = MONTH_EN_TO_MM[ end_month_en ]
        end_date = Date.new( year.to_i, end_month.to_i, end_day.to_i )

        ## pp start_date

        logger.debug " date entry: #{line}"
        logger.debug "   start_date: #{start_date}, year: #{year}, start_month_en: #{start_month_en}, start_month: #{start_month} start_day: #{start_day} => #{last_link_entry}"
        logger.debug "   end_date: #{end_date}, end_month_en: #{end_month_en}, end_day_en: #{end_day}"


        s = StringScanner.new( line )
        s.skip( /[ ]*-[ ]*/ )  ## skip leading list
        s.skip_until( /@/ )

        place = s.scan( /[^;]+/ ) ## get place (everything until ; (separator))
        place = place.strip          
        logger.debug "  place: #{place}, rest: >#{s.rest}<"

        ## note: cut of heading 1 (e.g. awesome-events title)
        more_places = stack[1..-1].reverse.map {|it| it[1] }.join(' › ')
        place = "#{place} › #{more_places}"
        logger.debug "  place: #{place}"

        s.skip( /;/ )
        s.skip( /[ ]*/ ) ## skip whitespaces
        date  = s.scan( /[^ ]+/ ) # e.g. everything untils first space (or end-of-line)

        title = last_link_entry

        event = Event.new( title, place, date, start_date, end_date )
        ## pp event
        events << event
      else
        logger.debug "  *** skip list item line: #{line}"
      end
    else
      logger.debug "  *** skip line: #{line}"
    end
  end
  
  events
end