Class: Athena::Formats::DBM

Inherits:
Base
  • Object
show all
Defined in:
lib/athena/formats/dbm.rb

Constant Summary collapse

RECORD_SEPARATOR =
Nuggets::Midos::Parser::DEFAULT_RS
FIELD_SEPARATOR =
Nuggets::Midos::Parser::DEFAULT_FS
VALUE_SEPARATOR =
Nuggets::Midos::Parser::DEFAULT_VS
TO_LATIN1 =
begin
  require 'iconv'
rescue LoadError
  Object.new.tap { |x|
    if ENV['ATHENA_DBM_NOCONV']
      def x.iconv(s); s; end
    else
      def x.iconv(s); s.encode('iso-8859-1', 'utf-8'); end
    end
  }
else
  iconv = Iconv.new('latin1//TRANSLIT//IGNORE', 'utf-8')
  def iconv.iconv(s); s; end if ENV['ATHENA_DBM_NOCONV']
  iconv
end

Instance Attribute Summary collapse

Attributes inherited from Base

#config, #output, #record_element

Instance Method Summary collapse

Methods inherited from Base

#deferred?, directions, format, has_direction?, init, #init, #raw?, #run

Instance Attribute Details

#dbm_parserObject (readonly)

Returns the value of attribute dbm_parser.



58
59
60
# File 'lib/athena/formats/dbm.rb', line 58

def dbm_parser
  @dbm_parser
end

Instance Method Details

#convert(record) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/athena/formats/dbm.rb', line 78

def convert(record)
  rs, fs, vs, crlf_re, iconv =
    RECORD_SEPARATOR, FIELD_SEPARATOR, VALUE_SEPARATOR, CRLF_RE, TO_LATIN1

  dbm = ["ID#{fs}#{record.id}"]

  record.struct.each { |field, struct|
    struct_values = struct[:values]
    struct_values.default = []

    strings = struct[:elements].map { |element|
      values = []

      struct_values[element].each { |value|
        if value
          value = value.strip.gsub(crlf_re, ' ')
          values << value unless value.empty?
        end
      }

      values.empty? ? struct[:empty] : values.join(vs)
    }

    dbm << "#{field.to_s.upcase}#{fs}#{iconv.iconv(struct[:string] % strings)}"
  }

  dbm << rs << CRLF

  dbm.join(CRLF)
end

#parse(input, &block) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/athena/formats/dbm.rb', line 60

def parse(input, &block)
  num = 0

  dbm_parser.parse(input) { |id, doc|
    Athena::Record.new(id, block) { |record|
      config.each { |element, field_config|
        Array(doc[element]).each { |value|
          record.update(element, value, field_config)
        }
      }
    }

    num += 1
  }

  num
end