Class: HashReader
- Inherits:
-
Object
- Object
- HashReader
- Includes:
- LogUtils::Logging
- Defined in:
- lib/textutils/reader/hash_reader.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#each ⇒ Object
nb: returns all values as strings.
-
#each_typed ⇒ Object
todo: what name to use: each_object or each_typed ??? or use new TypedHashReader class or similar??.
-
#initialize(arg) ⇒ HashReader
constructor
A new instance of HashReader.
Constructor Details
#initialize(arg) ⇒ HashReader
Returns a new instance of HashReader.
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/textutils/reader/hash_reader.rb', line 48 def initialize( arg ) if arg.is_a?( String ) ## old style (deprecated) - pass in filepath as string path = arg logger.info "HashReader.new - deprecated API - use HashReader.from_file() instead" text = File.read_utf8( path ) else ## assume it's a hash opts = arg text = opts[:text] end ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null) ## change it to !null to get plain nil ## w/ both syck and psych/libyml text = text.gsub( '!!null', '!null' ) ### hacks for yaml ### see yaml gotschas ## - http://www.perlmonks.org/?node_id=738671 ## - ## replace all tabs w/ two spaces and issue a warning ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html text = text.gsub( "\t" ) do |_| logger.warn "hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html (path=#{path})" ' ' # replace w/ two spaces end ## quote implicit boolean types on,no,n,y ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g. ## no: no text = text.gsub( /^([ ]*)(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*:/ ) do |value| logger.warn "hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})" # nb: preserve leading spaces for structure - might be significant "#{$1}'#{$2}':" # add quotes to turn it into a string (not bool e.g. true|false) end ## nb: value must be freestanding (only allow optional eol comment) ## do not escape if part of string sequence e.g. ## key: nb,nn,no,se => nb,nn,'no',se -- avoid!! # # check: need we add true|false too??? text = text.gsub( /:[ ]+(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*($| #.*$)/ ) do |value| logger.warn "hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})" ": '#{$1}'" # add quotes to turn it into a string (not bool e.g. true|false) end @hash = YAML.load( text ) end |
Class Method Details
.from_file(path) ⇒ Object
37 38 39 40 41 42 |
# File 'lib/textutils/reader/hash_reader.rb', line 37 def self.from_file( path ) ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark) ## - see textutils/utils.rb text = File.read_utf8( path ) self.from_string( text ) end |
.from_string(text) ⇒ Object
44 45 46 |
# File 'lib/textutils/reader/hash_reader.rb', line 44 def self.from_string( text ) HashReader.new( text: text ) end |
.from_zip(zip_file, entry_path) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/textutils/reader/hash_reader.rb', line 10 def self.from_zip( zip_file, entry_path ) entry = zip_file.find_entry( entry_path ) ## todo/fix: add force encoding to utf-8 ?? ## check!!! ## clean/prepprocess lines ## e.g. CR/LF (/r/n) to LF (e.g. /n) text = entry.get_input_stream().read() ## NOTE: needs logger ref; only available in instance methods; use global logger for now logger = LogUtils::Logger.root logger.debug "text.encoding.name (before): #{text.encoding.name}" ##### # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here ## NB: # for now "hardcoded" to utf8 - what else can we do? # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation text = text.force_encoding( Encoding::UTF_8 ) logger.debug "text.encoding.name (after): #{text.encoding.name}" ## todo: # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path ) self.from_string( text ) end |
Instance Method Details
#each ⇒ Object
nb: returns all values as strings
109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/textutils/reader/hash_reader.rb', line 109 def each @hash.each do |key_wild, value_wild| # normalize # - key n value as string (not symbols, bool? int? array?) # - remove leading and trailing whitespace key = key_wild.to_s.strip value = value_wild.to_s.strip logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<" yield( key, value ) end end |
#each_typed ⇒ Object
todo: what name to use: each_object or each_typed ???
or use new TypedHashReader class or similar??
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# File 'lib/textutils/reader/hash_reader.rb', line 127 def each_typed @hash.each do |key_wild, value_wild| # normalize # - key n value as string (not symbols, bool? int? array?) # - remove leading and trailing whitespace key = key_wild.to_s.strip if value_wild.is_a?( String ) value = value_wild.strip else value = value_wild end logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<" yield( key, value ) end end |