Class: NameParser
- Inherits:
-
Object
- Object
- NameParser
- Includes:
- LogUtils::Logging
- Defined in:
- lib/textutils/parser/name_parser.rb
Overview
fix: move into TextUtils namespace/module!! ??
Instance Method Summary collapse
Instance Method Details
#parse(chunks) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/textutils/parser/name_parser.rb', line 9 def parse( chunks ) ## todo/fix: (re)use nameparser - for now "simple" inline version ## fix!!! - note: for now lang gets ignored ## fix: add hanlde ## Leuven[nl]|Louvain[fr] Löwen[de] ## Antwerpen[nl]|Anvers[fr] [Antwerp] ## Brussel[nl]•Bruxelles[fr] -> official bi-lingual name ## etc. ## values - split into names (name n lang pairs) ## note: assumes (default) lang from more_attribs unless otherwise marked e.g. [] assume en etc. ## split chunks into values values = [] chunks.each do |chunk| next if chunk.nil? || chunk.blank? ## skip nil or empty/blank chunks parts = chunk.split( '|' ) # 1) split | parts.each do |part| s = StringScanner.new( part ) s.skip( /[ \t]+/) # skip whitespaces while s.eos? == false if s.check( /\[/ ) ## scan everything until the end of bracket (e.g.]) ## fix!!! - note: for now lang gets ignored value = s.scan( /\[[^\]]+\]/) value = value[1...-1] # strip enclosing [] e.g. [Bavaria] => Bavaria else ## scan everything until the begin of bracket (e.g.[) value = s.scan( /[^\[]+/) value = value.strip end values << value s.skip( /[ \t]+/) # skip whitespaces logger.debug( "[NameParser] eos?: #{s.eos?}, rest: >#{s.rest}<" ) end end end logger.debug( "[NameParser] values=#{values.inspect}") names = [] values.each do |value| name = value ## todo: split by bullet ? (official multilang name) e.g. Brussel • Bruxelles ## todo: process variants w/ () e.g. Krems (a. d. Donau) etc. ?? names << name end logger.debug( "[NameParser] names=#{names.inspect}") names end |