Class: Splog::LogParser
- Inherits:
-
Object
- Object
- Splog::LogParser
- Defined in:
- lib/splog.rb
Instance Attribute Summary collapse
-
#client ⇒ Object
readonly
Define the accessors to mongo, all db writes happen to the configured @coll.
-
#coll ⇒ Object
readonly
Define the accessors to mongo, all db writes happen to the configured @coll.
-
#config ⇒ Object
Returns the value of attribute config.
-
#options ⇒ Object
Returns the value of attribute options.
-
#pattern_name ⇒ Object
Returns the value of attribute pattern_name.
Instance Method Summary collapse
- #cli(args = nil) ⇒ Object
- #createsig(body) ⇒ Object
-
#initialize ⇒ LogParser
constructor
A new instance of LogParser.
- #load_dot_file ⇒ Object
-
#parse(enum_ref) ⇒ Object
Takes an enum and iterates over it with logic to parse the log lines based on the configuration.
-
#parse_datetime(the_input, the_format = nil) ⇒ Object
Attempt to parse a datetime or return None.
-
#parse_float(the_input) ⇒ Object
Attempt to parse a float or return 0.
-
#parse_int(the_input) ⇒ Object
Attempt to parse an int or return 0.
- #parse_line(line, opts = {}) ⇒ Object
- #persist_log_entry(parsed_line) ⇒ Object
- #read_input(the_input) ⇒ Object
- #read_log_file(file_name) ⇒ Object
- #set_mapping(options) ⇒ Object
- #set_pattern(options) ⇒ Object
- #set_pattern_match_forward ⇒ Object
- #sigflat(body) ⇒ Object
- #update_progress_bar ⇒ Object
Constructor Details
#initialize ⇒ LogParser
Returns a new instance of LogParser.
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/splog.rb', line 23 def initialize # Yaml config options @config = {} # Progress bar to create if in verbose mode @progress_bar = nil @line_count = nil # Command line options @options = { :append => true } # Defines how each line is split apart with the array of regex @pattern_name = nil @pattern = nil # Defines how each regex group is mapped to a data type @mapping_name = nil @mapping = nil # Define the mongo client, nil by default until first persist to log entry @client = nil end |
Instance Attribute Details
#client ⇒ Object (readonly)
Define the accessors to mongo, all db writes happen to the configured @coll
21 22 23 |
# File 'lib/splog.rb', line 21 def client @client end |
#coll ⇒ Object (readonly)
Define the accessors to mongo, all db writes happen to the configured @coll
21 22 23 |
# File 'lib/splog.rb', line 21 def coll @coll end |
#config ⇒ Object
Returns the value of attribute config.
18 19 20 |
# File 'lib/splog.rb', line 18 def config @config end |
#options ⇒ Object
Returns the value of attribute options.
18 19 20 |
# File 'lib/splog.rb', line 18 def @options end |
#pattern_name ⇒ Object
Returns the value of attribute pattern_name.
18 19 20 |
# File 'lib/splog.rb', line 18 def pattern_name @pattern_name end |
Instance Method Details
#cli(args = nil) ⇒ Object
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 |
# File 'lib/splog.rb', line 426 def cli(args=nil) = { :append => true, :output => 'stdout', :md5 => true # By defualt md5 the hash as the unique identifier } opts = OptionParser.new do |parser| parser. = 'Usage: splog [options]' parser.separator '' parser.separator 'Parse logs in arbitrary formats defined in ~/.splog.yml:' parser.on('-p', '--pattern STR', 'Mapping name defined in ~/.splog.yml') do |setting| [:pattern_name] = setting end parser.on('-f', '--file PATH', 'File to parse') do |setting| [:file_name] = setting ? File.(setting) : setting end parser.on('-c', '--config PATH', 'Optional dot file path. Defaults to ~/.splog.yml') do |setting| [:dot_file_name] = setting ? File.(setting) : setting end parser.on('-o', '--output [stdout|filename]', 'Defaults to stdout, if specifying just -o then defaults to no standard output.') do |setting| [:output] = setting ? setting : nil end parser.on('--no-append', "When a line doesn't match the regex, don't append it to the previously matched line. The default is to append.") do |setting| [:append] = setting.nil? end parser.on('-k', '--key STR', 'The unique business key to use as the database id. If none specified an automatic id will be generated.') do |setting| [:key] = setting end parser.on('-d', '--database STR', 'Specify a database reference defined in ~/.splog.yml to write to') do |ext| [:db_ref_name] = ext || nil end parser.on('--db STR', 'Override the Mongo database defined in ~/.splog.yml') do |ext| [:mongo_db] = ext || nil end parser.on('--coll STR', 'Override the Mongo collection defined in ~/.splog.yml') do |ext| [:mongo_coll] = ext || nil end parser.on('--line-count N', Integer, 'If reading from STDIN (using pipes) Setting the line count will allow splog to better log th progress in verbose mode. Ex. --line-count `wc -l some.log`') do |ext| [:line_count] = ext || nil end parser.on('--[no-]md5', 'When saving to mongo md5 the hash and set that to the _id. This means repeated parses of the same log file should be idempotent. Otherwise there will be duplicated lines in the database.') do |ext| [:md5] = ext # if -m then == true end parser.on('-v', 'Verbose logging, recommended in conjunction with -o without any arguments.') do |ext| [:verbose] = ext # if -m then == true end parser.on_tail('-h', '--help', '--usage', 'Show this usage message and quit.') do |setting| puts parser.help exit end #parser.on_tail("-v", "--version", "Show version information about this program and quit.") do # puts "Splog v1.0.0" # exit #end end begin if args and not args.length == 0 opts.parse!(args) else ARGV << '-h' if ARGV.size == 0 opts.parse!(ARGV) end rescue OptionParser::ParseError $stderr.print "Error: #{$!}\n" exit end if ([:file_name] and [:pattern_name]) or not $stdin.tty? @options = # At this point the options are loaded so load the dot file before continuing so the config can be properly # Loaded from the dot file and further options determined load_dot_file set_pattern() set_pattern_match_forward set_mapping() # Total line count, if file input we can easily do wc -l on the file. If $stdin we can allow allow a user defined # input from --line-count `wc -l <filename>` # Get the enum from the file e = nil if [:file_name] and [:pattern_name] e = read_log_file([:file_name]) @line_count = %x{wc -l #{[:file_name]}}.split.first.to_i # Set the progress bar total #update_progress_bar_total(line_count) # Or stdin otherwise elsif not $stdin.tty? e = $stdin.to_enum @line_count = [:line_count] else $stderr.print 'Please either specify a -f FILENAME or pipe content to splog.' exit end # outputting to stdout simply prints 1 parsed line per line if [:output] == 'stdout' # Parse each line of the file through the log parser parse(e).each do |parsed_line| if [:db_ref_name] persist_log_entry(parsed_line) end # Then write to stdout $stdout.write parsed_line.to_s $stdout.write "\n" end # outputting to json will construct a valid json array so you can do something like splog ... | prettyjson elsif [:output] == 'json' # Parse each line of the file through the log parser $stdout.write '[' pe = parse(e) begin while true parsed_line = pe.next if [:db_ref_name] persist_log_entry(parsed_line) end # Then write to stdout $stdout.write parsed_line.to_json $stdout.write ',' unless pe.peek.nil? end rescue => detail nil end # If a \n is not written a % shows on the console output thus breaking the json array $stdout.write "]\n" # outputting nothing if -o given with no value. Useful for perf testing mainly elsif [:output] == nil pe = parse(e) begin while true parsed_line = pe.next if [:db_ref_name] persist_log_entry(parsed_line) end end rescue => detail nil end # Otherwise return the enumerator back up to be iterated over either in testing or in a program requiring this code else return read_log_file([:file_name]) end else $stderr.print "Please either specify a -f FILENAME or pipe in content\n" end end |
#createsig(body) ⇒ Object
50 51 52 |
# File 'lib/splog.rb', line 50 def createsig(body) Digest::MD5.hexdigest( sigflat body ) end |
#load_dot_file ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/splog.rb', line 112 def load_dot_file # yml config dot_file = @options[:dot_file_name] || '~/.splog.yml' #puts "Loading dot_file from #{dot_file}" begin prop_list = YAML.load_file(File.(dot_file)) prop_list.each do |key, value| @config[key] = value end rescue => detail $stderr.puts "Unable to find or read #{dot_file}\n" $stderr.puts $! exit end end |
#parse(enum_ref) ⇒ Object
Takes an enum and iterates over it with logic to parse the log lines based on the configuration
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 |
# File 'lib/splog.rb', line 269 def parse(enum_ref) e = Enumerator.new do |y| # Defines the current parsed line. Next linese can be added to this one potentially based on a key current_working_line = nil parsed_line = nil begin while enum_ref line = enum_ref.next parsed_line = parse_line(line) next_line = enum_ref.peek # Pass in the 'match_forward_regex' if it exists so the next line can be evaluated in this context #parsed_next_line = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward}) #parsed_next_line_test = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward}) # Performance optimization here, don't do a full #match only =~ since not all next lines need to be parsed period #parsed_next_line_test = @pattern_match_forward.nil? ? next_line =~ @pattern : next_line =~ @pattern_match_forward #egrep = "echo \"#{next_line}\" | egrep \"#{@pattern_egrep}\"" #egrep_fwd = 'echo ' + next_line + ' | egrep ' + @pattern_match_forward_egrep #p egrep #p egrep_fwd #parsed_next_line_test = @pattern_match_forward.nil? ? `#{egrep}` : `#{egrep_fwd}` o, e, s = nil begin o, e, s = Open3.capture3(@pattern_match_forward.nil? ? @pattern_egrep : @pattern_match_forward_egrep, :stdin_data=>next_line) rescue Errno::EPIPE #puts "Connection broke!" nil end ############################################################################################################ # If the next line matches the match_forward_regex ############################################################################################################ #if parsed_next_line and @config[@pattern_name]['match_forward_regex'] #if not parsed_next_line_test.nil? and @config[@pattern_name]['match_forward_regex'] if s && s.success? and @config[@pattern_name]['match_forward_regex'] # Do the actual match now that we know it matches parsed_next_line = @pattern_match_forward.nil? ? parse_line(next_line) : parse_line(next_line, {:regex => @pattern_match_forward}) # If the current_working_line does not yet exist, set it to the latest parsed line if current_working_line.nil? and parsed_line current_working_line = parsed_line end # Add to the match_forward_keyname_source from the match_forward_keyname_dest current_working_line[@config[@pattern_name]['match_forward_keyname_source']] << parsed_next_line[@config[@pattern_name]['match_forward_keyname_source']] # fast forward the enum one click to account for the peek enum_ref.next # Read until StopIteration or the match_forward_regex no longer matches while true # Only peek here to not advance the enum unnecessarily sub_line = enum_ref.peek #parsed_sub_line = @config[@pattern_name]['match_forward_regex'].nil? ? nil : parse_line(sub_line, {:regex => @pattern_match_forward}) parsed_sub_line = @pattern_match_forward.nil? ? nil : parse_line(sub_line, {:regex => @pattern_match_forward}) if parsed_sub_line # if matched advance the enum and add the data to the current working line enum_ref.next current_working_line[@config[@pattern_name]['match_forward_keyname_source']] << parsed_sub_line[@config[@pattern_name]['match_forward_keyname_source']] else # Otherwise we've reached the end of the matched pattern yield this match out y << current_working_line # Since that is yielded, set the current_working_line to nil so it has a fresh start for the next iter current_working_line = nil break end end ############################################################################################################ # Otherwise if the next line is nil but the parsed line matched and we are appending ############################################################################################################ elsif parsed_line and parsed_next_line.nil? and @options[:append] # If the current_working_line does not yet exist, set it to the latest parsed line if current_working_line.nil? and parsed_line current_working_line = parsed_line end # Read until StopIteration or a new parsed line is found while true # Only peek here to not advance the enum unnecessarily sub_line = enum_ref.peek # TODO this can be optimized too since I'm attmpting to not match it! I don't even read the parsed_sub_line #parsed_sub_line = parse_line(sub_line) o, e, s = nil begin o, e, s = Open3.capture3(@pattern_egrep, :stdin_data=>sub_line) rescue Errno::EPIPE #puts "Connection broke!" nil end #if parsed_sub_line.nil? and @config[@pattern_name]['unmatched_append_key_name'] if (s.nil? or not s.success?) && @config[@pattern_name]['unmatched_append_key_name'] # if unmatched advance the enum and add the data to the current working line enum_ref.next current_working_line[@config[@pattern_name]['unmatched_append_key_name']] << sub_line else # Otherwise we've reached the end of the matched pattern yield this match out y << current_working_line # Since that is yielded, set the current_working_line to nil so it has a fresh start for the next iter current_working_line = nil break end end ############################################################################################################ # Otherwise just your average joe matched line ############################################################################################################ elsif parsed_line y << parsed_line end end rescue StopIteration => e #if both current_working_line and parsed line yield them both as this situation can happen when peeking forward # After an unmatched line if current_working_line and parsed_line and current_working_line != parsed_line y << current_working_line y << parsed_line # Yield point for a successfully parsed line elsif current_working_line y << current_working_line else y << parsed_line end end end end |
#parse_datetime(the_input, the_format = nil) ⇒ Object
Attempt to parse a datetime or return None
203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/splog.rb', line 203 def parse_datetime(the_input, the_format=nil) output = the_input begin output = the_format ? DateTime.strptime(the_input, the_format) : DateTime.parse(the_input) # Convert the time to utc for mongo output = output.nil? ? nil : output.to_time.utc rescue => detail nil end output end |
#parse_float(the_input) ⇒ Object
Attempt to parse a float or return 0
192 193 194 195 196 197 198 199 200 |
# File 'lib/splog.rb', line 192 def parse_float(the_input) output = 0 begin output = the_input.to_f rescue => detail nil end output end |
#parse_int(the_input) ⇒ Object
Attempt to parse an int or return 0
181 182 183 184 185 186 187 188 189 |
# File 'lib/splog.rb', line 181 def parse_int(the_input) output = 0 begin output = the_input.to_i rescue => detail nil end output end |
#parse_line(line, opts = {}) ⇒ Object
215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
# File 'lib/splog.rb', line 215 def parse_line(line, opts={}) res = {} #parts = opts[:parts] || @config[@pattern_name]['regex'] begin #pattern = @config[@pattern_name].has_key?('delim') ? "\\s*#{parts.join(@config[@pattern_name]['delim'])}\\s*" : "\\s*#{parts.join()}\\s*" # was working line #r = Regexp.new(pattern, Regexp::MULTILINE) #m = r.match(line) if opts[:regex] m = opts[:regex].match(line) else m = @pattern.match(line) end res = {} if m m.names.each do |group_name| k = group_name v = m[k] # print("k: {}, v: {}".format(k, v)) if @mapping and @mapping.has_key?(k) # print("self.mapping[k]: %s" % self.mapping[k]) if ['Int', 'Integer'].include? @mapping[k]['data_type'] res[k] = parse_int(m[k]) elsif ['Float'].include? @mapping[k]['data_type'] res[k] = parse_float(m[k]) elsif ['DateTime'].include? @mapping[k]['data_type'] res[k] = parse_datetime(m[k], @mapping[k]['format']) end else res[k] = v end end end rescue => detail $stderr.puts $! detail.backtrace.each { |e| $stderr.puts e} end # If a key exists add the key to the parsed_line, This can help differentiate the log if not putting each # Log into a unique collection, or even then helps differentiate the logs within a collection. Ex. if you had # access_log and error_log in the same collection you may want a specific key for each of those if @options[:key] && res && res.length != 0 res['key'] = @options[:key] end if @options[:md5] && res && res.length != 0 res['_id'] = createsig(res) end # Return nil if the hash hasn't been populated res.length == 0 ? nil : res end |
#persist_log_entry(parsed_line) ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/splog.rb', line 76 def persist_log_entry(parsed_line) begin if @client.nil? and @options[:db_ref_name] db_ref_name = @options[:db_ref_name] host = @config['db_refs'][db_ref_name]['host'] || '127.0.0.1' port = @config['db_refs'][db_ref_name]['port'] || 27107 user = @config['db_refs'][db_ref_name]['user'] || nil pass = @config['db_refs'][db_ref_name]['pass'] || nil db = @options[:mongo_db] || @config['db_refs'][db_ref_name]['db'] coll = @options[:mongo_coll] || @config['db_refs'][db_ref_name]['collection'] @client = MongoClient.new(host, port, :pool_size => 1) db = @client.db(db) auth = nil if user and user != '' && pass auth = db.authenticate(user, pass) #p "Authentication to mongo returned: #{auth}" end @coll = db[coll] end # Assuming the above is successfull write to the collection, otherwise silently do nothing if @client and @coll # If an _id exists upsert the doc if parsed_line.has_key?('_id') @coll.update({:_id => parsed_line['_id']}, parsed_line, opts = {:upsert => true}) # Otherwise insert the parsed_line which will cause a Mongo specific _id to be generated else @coll.insert(parsed_line) end end rescue => detail $stderr.puts $! end end |
#read_input(the_input) ⇒ Object
402 403 404 405 406 |
# File 'lib/splog.rb', line 402 def read_input(the_input) # Split the input by lines, chomp them, and return an enum #the_input.lines.map(&:chomp).to_enum the_input.lines.to_enum end |
#read_log_file(file_name) ⇒ Object
408 409 410 |
# File 'lib/splog.rb', line 408 def read_log_file(file_name) File.open(file_name).to_enum end |
#set_mapping(options) ⇒ Object
168 169 170 171 172 173 174 175 176 177 178 |
# File 'lib/splog.rb', line 168 def set_mapping() begin tmp = {} @config[[:pattern_name]]['mapping'].each { |x| tmp[x['name']] = x } unless @config[[:pattern_name]]['mapping'].nil? @mapping = tmp rescue => detail puts 'Unable to read the mapping in your .splog.yaml configuration. Please reference https://github.com/engineersamuel/splog for proper formatting.' $stderr.puts $! exit end end |
#set_pattern(options) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/splog.rb', line 128 def set_pattern() @pattern_name = [:pattern_name] begin # MULTILINE to match the \n chars #Regexp::MULTILINE | Regexp::IGNORECASE @pattern = @config[[:pattern_name]]['regex'] delim = @config[@pattern_name].nil? ? "\\s+" : @config[@pattern_name]['delim'] c = "#{@pattern.join(delim)}" # Remove the grouped named @pattern_egrep = 'egrep "' + c.gsub(/\?<.*?>/, '') + '"' r = Regexp.new(c, Regexp::MULTILINE) @pattern = r rescue => detail #detail.backtrace.each { |e| $stderr.puts e} #$stderr.puts $! puts "No pattern matching '#{[:pattern_name]}' found. Please choose another name or define this pattern in the your .splog.yaml" exit end end |
#set_pattern_match_forward ⇒ Object
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/splog.rb', line 148 def set_pattern_match_forward begin @pattern_match_forward = @config[[:pattern_name]]['match_forward_regex'] # since this is optional only compile if set if @pattern_match_forward delim = @config[@pattern_name].nil? ? "\\s+" : @config[@pattern_name]['delim'] # Remove the grouped named c = "#{@pattern_match_forward.join(delim)}" r = Regexp.new(c, Regexp::MULTILINE) @pattern_match_forward_egrep = 'egrep "' + c.gsub(/\?<.*?>/, '') + '"' @pattern_match_forward = r end rescue => detail #detail.backtrace.each { |e| $stderr.puts e} $stderr.puts $! #puts "No pattern matching '#{options[:pattern_name]}' found. Please choose another name or define this pattern in the your .splog.yaml" exit end end |
#sigflat(body) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/splog.rb', line 54 def sigflat(body) if body.class == Hash arr = [] body.each do |key, value| arr << "#{sigflat key}=>#{sigflat value}" end body = arr end if body.class == Array str = '' body.map! do |value| sigflat value end.sort!.each do |value| str << value end end if body.class != String body = body.to_s << body.class.to_s end body end |
#update_progress_bar ⇒ Object
412 413 414 415 416 417 418 419 420 421 422 423 424 |
# File 'lib/splog.rb', line 412 def if [:verbose] and not @progress_bar if @line_count.nil? @progress_bar = ProgressBar.create(:starting_at => 0, :total => @line_count) else @progress_bar = ProgressBar.create(:title => 'Lines Read', :format => '[%a] %c Completed |%b>>%i| %p%% %t [%e]', :total => @line_count) end end if @options[:verbose] and @progress_bar @progress_bar.increment end end |