Class: WorldDb::UsageReader
- Inherits:
-
Object
- Object
- WorldDb::UsageReader
- Includes:
- LogUtils::Logging, Models, TextUtils::ValueHelper
- Defined in:
- lib/worlddb/readers/usage.rb
Class Method Summary collapse
- .from_file(path, opts = {}) ⇒ Object
- .from_string(text, opts = {}) ⇒ Object
-
.from_zip(zip_file, entry_path) ⇒ Object
todo: add opts.
Instance Method Summary collapse
-
#initialize(text, opts = {}) ⇒ UsageReader
constructor
A new instance of UsageReader.
- #read ⇒ Object
- #skip_tags? ⇒ Boolean
- #strict? ⇒ Boolean
Constructor Details
#initialize(text, opts = {}) ⇒ UsageReader
Returns a new instance of UsageReader.
61 62 63 64 65 66 67 68 |
# File 'lib/worlddb/readers/usage.rb', line 61 def initialize( text, opts={} ) @text = text ## option: do NOT generate/add any tags for countries/regions/cities @skip_tags = opts[:skip_tags].present? ? true : false ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists @strict = opts[:strict].present? ? true : false end |
Class Method Details
.from_file(path, opts = {}) ⇒ Object
46 47 48 49 50 51 |
# File 'lib/worlddb/readers/usage.rb', line 46 def self.from_file( path, opts={} ) ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark) ## - see textutils/utils.rb text = File.read_utf8( path ) self.from_string( text, opts ) end |
.from_string(text, opts = {}) ⇒ Object
53 54 55 |
# File 'lib/worlddb/readers/usage.rb', line 53 def self.from_string( text, opts={} ) UsageReader.new( text, opts ) end |
.from_zip(zip_file, entry_path) ⇒ Object
todo: add opts
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/worlddb/readers/usage.rb', line 17 def self.from_zip( zip_file, entry_path ) ## get text content from zip entry = zip_file.find_entry( entry_path ) ## todo/fix: add force encoding to utf-8 ?? ## check!!! ## clean/prepprocess lines ## e.g. CR/LF (/r/n) to LF (e.g. /n) text = entry.get_input_stream().read() ## NOTE: needs logger ref; only available in instance methods; use global logger for now logger = LogUtils::Logger.root logger.debug "text.encoding.name (before): #{text.encoding.name}" ##### # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here ## NB: # for now "hardcoded" to utf8 - what else can we do? # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation text = text.force_encoding( Encoding::UTF_8 ) logger.debug "text.encoding.name (after): #{text.encoding.name}" ## todo: # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path ) self.from_string( text ) end |
Instance Method Details
#read ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/worlddb/readers/usage.rb', line 70 def read() reader = HashReader.from_string( @text ) reader.each do |key, value| ### fix: ## move to Usage.read() for (re)use logger.debug " adding langs >>#{value}<<to country >>#{key}<<" country = Country.find_by_key!( key ) lang_keys = value.split(',') lang_keys.each do |lang_key| ### remove (optional comment) from key (e.g. carribean (islands)) lang_key = lang_key.gsub( /\(.+\)/, '' ) ## remove leading n trailing space lang_key = lang_key.strip lang = Lang.find_by_key!( lang_key ) Usage.create!( country_id: country.id, lang_id: lang.id, official: true, minor: false ) end end end |
#skip_tags? ⇒ Boolean
58 |
# File 'lib/worlddb/readers/usage.rb', line 58 def () @skip_tags == true; end |
#strict? ⇒ Boolean
59 |
# File 'lib/worlddb/readers/usage.rb', line 59 def strict?() @strict == true; end |