Class: CMess::BConv

Inherits:
Object
  • Object
show all
Defined in:
lib/cmess/bconv.rb

Overview

Convert between bibliographic (and other) encodings.

Constant Summary collapse

VERSION =

our version ;-)

'0.0.2'
INTERMEDIATE_ENCODING =
'utf-8'
DEFAULT_CHARTAB_FILE =
File.join(CMess::DATA_DIR, 'chartab.yaml')

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input, output, source_encoding, target_encoding, chartab = DEFAULT_CHARTAB_FILE) ⇒ BConv

Returns a new instance of BConv.



74
75
76
77
78
79
80
81
82
# File 'lib/cmess/bconv.rb', line 74

def initialize(input, output, source_encoding, target_encoding, chartab = DEFAULT_CHARTAB_FILE)
  @input, @output = input, output

  @source_encoding = source_encoding.upcase
  @target_encoding = target_encoding.upcase

  @chartab = self.class.load_chartab(chartab)
  @encodings = self.class.encodings(@chartab)
end

Instance Attribute Details

#chartabObject (readonly)

Returns the value of attribute chartab.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def chartab
  @chartab
end

#encodingsObject (readonly)

Returns the value of attribute encodings.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def encodings
  @encodings
end

#inputObject (readonly)

Returns the value of attribute input.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def input
  @input
end

#outputObject (readonly)

Returns the value of attribute output.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def output
  @output
end

#source_encodingObject (readonly)

Returns the value of attribute source_encoding.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def source_encoding
  @source_encoding
end

#target_encodingObject (readonly)

Returns the value of attribute target_encoding.



72
73
74
# File 'lib/cmess/bconv.rb', line 72

def target_encoding
  @target_encoding
end

Class Method Details

.convert(*args) ⇒ Object



54
55
56
# File 'lib/cmess/bconv.rb', line 54

def convert(*args)
  new(*args).convert
end

.encodings(chartab = DEFAULT_CHARTAB_FILE) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/cmess/bconv.rb', line 46

def encodings(chartab = DEFAULT_CHARTAB_FILE)
  chartab = load_chartab(chartab)

  chartab[chartab.keys.first].keys.map { |encoding|
    encoding.upcase unless encoding =~ /\A__/
  }.compact.sort
end

.load_chartab(chartab) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
# File 'lib/cmess/bconv.rb', line 58

def load_chartab(chartab)
  case chartab
    when Hash
      chartab
    when String
      raise "chartab file not found: #{chartab}" unless File.readable?(chartab)
      YAML.load_file(chartab)
    else
      raise ArgumentError, "invalid chartab of type #{chartab.class}"
  end
end

Instance Method Details

#convertObject



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/cmess/bconv.rb', line 88

def convert
  if encoding?(source_encoding)
    if encoding?(target_encoding)
      @charmap = chartab.inject({}) { |hash, (code, map)|
        hash.update(map[source_encoding] => map[target_encoding].pack('U*'))
      }

      input.each_byte { |char|
        output.print map(char)
      }
    else
      iconv = iconv_to

      @charmap = chartab.inject({}) { |hash, (code, map)|
        hash.update(map[source_encoding] => [code.to_i(16)].pack('U*'))
      }

      input.each_byte { |char|
        output.print iconv.iconv(map(char))
      }
    end
  else
    if encoding?(target_encoding)
      iconv = iconv_from

      charmap = chartab.inject({}) { |hash, (code, map)|
        hash.update(code.to_i(16) => map[target_encoding].pack('U*'))
      }

      input.each { |line|
        iconv.iconv(line).unpack('U*').each { |char|
          output.print charmap[char]
        }
      }
    else
      iconv = iconv_from_to

      input.each { |line|
        output.puts iconv.iconv(line)
      }
    end
  end
end

#encoding?(encoding) ⇒ Boolean

Returns:

  • (Boolean)


84
85
86
# File 'lib/cmess/bconv.rb', line 84

def encoding?(encoding)
  encodings.include?(encoding)
end