Class: HashReader

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/textutils/reader/hash_reader.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(arg) ⇒ HashReader

Returns a new instance of HashReader.



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/textutils/reader/hash_reader.rb', line 48

def initialize( arg )

  if arg.is_a?( String )  ## old style (deprecated) - pass in filepath as string
    path = arg
    logger.info "HashReader.new - deprecated API - use HashReader.from_file() instead"
    text = File.read_utf8( path )
  else   ## assume it's a hash
    opts = arg
    text = opts[:text]
  end

  ### hack for syck yaml parser (e.g.ruby 1.9.2) (cannot handle !!null)
  ##   change it to !null to get plain nil
  ##   w/ both syck and psych/libyml

  text = text.gsub( '!!null', '!null' )
 
  ### hacks for yaml
  
  ### see yaml gotschas
  ##  - http://www.perlmonks.org/?node_id=738671
  ##  - 

  ## replace all tabs w/ two spaces and issue a warning
  ## nb: yaml does NOT support tabs see why here -> yaml.org/faq.html
  
  text = text.gsub( "\t" ) do |_|
    logger.warn "hash reader - found tab (\t) replacing w/ two spaces; yaml forbids tabs; see yaml.org/faq.html (path=#{path})"
    '  '  # replace w/ two spaces
  end

  ## quote implicit boolean types on,no,n,y

  ## nb: escape only if key e.g. no: or "free standing" value on its own line e.g.
  ##   no: no

  text = text.gsub( /^([ ]*)(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*:/ ) do |value|
    logger.warn "hash reader - found implicit bool (#{$1}#{$2}) for key; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
    # nb: preserve leading spaces for structure - might be significant
    "#{$1}'#{$2}':"  # add quotes to turn it into a string (not bool e.g. true|false)
  end

  ## nb: value must be freestanding (only allow optional eol comment)
  ##  do not escape if part of string sequence e.g.
  ##  key: nb,nn,no,se   => nb,nn,'no',se  -- avoid!!
  #
  #  check: need we add true|false too???

  text = text.gsub( /:[ ]+(ON|On|on|OFF|Off|off|YES|Yes|yes|NO|No|no|Y|y|N|n)[ ]*($| #.*$)/ ) do |value|
    logger.warn "hash reader - found implicit bool (#{$1}) for value; adding quotes to turn into string; see yaml.org/refcard.html (path=#{path})"
    ": '#{$1}'"  # add quotes to turn it into a string (not bool e.g. true|false)
  end

  
  @hash = YAML.load( text )
end

Class Method Details

.from_file(path) ⇒ Object



37
38
39
40
41
42
# File 'lib/textutils/reader/hash_reader.rb', line 37

def self.from_file( path )
  ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
  ## - see textutils/utils.rb
  text = File.read_utf8( path )
  self.from_string( text )
end

.from_string(text) ⇒ Object



44
45
46
# File 'lib/textutils/reader/hash_reader.rb', line 44

def self.from_string( text )
  HashReader.new( text: text )
end

.from_zip(zip_file, entry_path) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/textutils/reader/hash_reader.rb', line 10

def self.from_zip( zip_file, entry_path )
  entry = zip_file.find_entry( entry_path )

  ## todo/fix: add force encoding to utf-8 ??
  ##  check!!!
  ##  clean/prepprocess lines
  ##  e.g. CR/LF (/r/n) to LF (e.g. /n)
  text = entry.get_input_stream().read()

  ## NOTE: needs logger ref; only available in instance methods; use global logger for now
  logger = LogUtils::Logger.root
  logger.debug "text.encoding.name (before): #{text.encoding.name}"
#####
# NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
## NB:
# for now "hardcoded" to utf8 - what else can we do?
# - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
  text = text.force_encoding( Encoding::UTF_8 )
  logger.debug "text.encoding.name (after): #{text.encoding.name}"     

  ## todo:
  # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
  ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )

  self.from_string( text )
end

Instance Method Details

#eachObject

nb: returns all values as strings



109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/textutils/reader/hash_reader.rb', line 109

def each
  @hash.each do |key_wild, value_wild|
    # normalize
    # - key n value as string (not symbols, bool? int? array?)
    # - remove leading and trailing whitespace
    key   = key_wild.to_s.strip
    value = value_wild.to_s.strip
    
    logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
  
    yield( key, value )
  end
end

#each_typedObject

todo: what name to use: each_object or each_typed ???

or use new TypedHashReader class or similar??


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/textutils/reader/hash_reader.rb', line 127

def each_typed
  @hash.each do |key_wild, value_wild|
    # normalize
    # - key n value as string (not symbols, bool? int? array?)
    # - remove leading and trailing whitespace
    key   = key_wild.to_s.strip
    
    if value_wild.is_a?( String )
      value = value_wild.strip
    else
      value = value_wild
    end
    
    logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value}<<"
  
    yield( key, value )
  end
end