Class: WorldDb::ReaderBaseWithOpts

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging, Models, TextUtils::ValueHelper
Defined in:
lib/worlddb/readers/base.rb

Direct Known Subclasses

LangReader, UsageReader

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TextUtils::ValueHelper

#is_state?, #match_city, #match_country, #match_metro, #match_metro_flag, #match_metro_pop, #match_state_for_country, #match_supra, #match_supra_flag

Constructor Details

#initialize(text, opts = {}) ⇒ ReaderBaseWithOpts

Returns a new instance of ReaderBaseWithOpts.



137
138
139
140
141
142
143
144
# File 'lib/worlddb/readers/base.rb', line 137

def initialize( text, opts={} )
  @text = text

  ## option: do NOT generate/add any tags for countries/regions/cities
  @skip_tags =  opts[:skip_tags].present? ? true : false
  ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
  @strict    =  opts[:strict].present? ? true : false
end

Class Method Details

.from_file(path, opts = {}) ⇒ Object



121
122
123
124
125
126
# File 'lib/worlddb/readers/base.rb', line 121

def self.from_file( path, opts={} )
  ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
  ## - see textutils/utils.rb
  text = File.read_utf8( path )
  self.from_string( text, opts )
end

.from_string(text, opts = {}) ⇒ Object



128
129
130
131
# File 'lib/worlddb/readers/base.rb', line 128

def self.from_string( text, opts={} )
  puts "[debug] ReaderBase.from_string calling #{self.name}.new"  # note: assume self is derived class (object)
  self.new( text, opts )
end

.from_zip(zip_file, entry_path) ⇒ Object

todo: add opts={} etc.



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/worlddb/readers/base.rb', line 92

def self.from_zip( zip_file, entry_path )
  ## get text content from zip

  entry = zip_file.find_entry( entry_path )

  ## todo/fix: add force encoding to utf-8 ??
  ##  check!!!
  ##  clean/prepprocess lines
  ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
  text = entry.get_input_stream().read()

  ## NOTE: needs logger ref; only available in instance methods; use global logger for now
  logger = LogUtils::Logger.root
  logger.debug "text.encoding.name (before): #{text.encoding.name}"
#####
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
## NB:
# for now "hardcoded" to utf8 - what else can we do?
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
  text = text.force_encoding( Encoding::UTF_8 )
  logger.debug "text.encoding.name (after): #{text.encoding.name}"     

  ## todo:
  # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
  ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )

  self.from_string( text )
end

Instance Method Details

#skip_tags?Boolean

Returns:

  • (Boolean)


134
# File 'lib/worlddb/readers/base.rb', line 134

def skip_tags?()   @skip_tags == true;  end

#strict?Boolean

Returns:

  • (Boolean)


135
# File 'lib/worlddb/readers/base.rb', line 135

def strict?()      @strict == true;     end