Class: Unrich::Text

Inherits:
Object
  • Object
show all
Defined in:
lib/unrich.rb

Overview

Main class that parses the richt text to unrich text, read regular plain text

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(rtf_text) ⇒ Text

Returns a new instance of Text.



12
13
14
# File 'lib/unrich.rb', line 12

def initialize(rtf_text)
  self.rtf_text = rtf_text
end

Instance Attribute Details

#rtf_textObject

Returns the value of attribute rtf_text.



10
11
12
# File 'lib/unrich.rb', line 10

def rtf_text
  @rtf_text
end

Class Method Details

.read(contents) ⇒ Object



59
60
61
62
63
64
65
66
# File 'lib/unrich.rb', line 59

def read(contents)
  if contents.is_a? String
    return self.new(contents)
  elsif contents.is_a? File
    return self.new(contents.read)
  elsif raise Error, "unkown contents"
  end
end

Instance Method Details

#encodingObject



16
17
18
19
20
# File 'lib/unrich.rb', line 16

def encoding
  @encoding ||= {
    "ansicpg1252" => "Windows-1252"
  }[rtf_text.match(/\A\{\\rtf1\\([a-zA-Z0-9]*)\\([a-zA-Z0-9]*)\\/)[2]]
end

#to_sObject



37
38
39
# File 'lib/unrich.rb', line 37

def to_s
  to_txt
end

#to_txtObject



22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/unrich.rb', line 22

def to_txt
  txt = rtf_text.gsub(/\\\'([a-z0-9]{2})/) { |a|
    [a.sub("\\'",
           '')].pack("H*").force_encoding(encoding).encode("utf-8")
  }
                .gsub(/\\par\s/, "\n")
                .sub('{\rtf1', "")
                .sub(/{[^{^}]*}/, "")
                .sub(/{[^{^}]*}/, "")
                .sub(/{[^{^}]*}/, "")
                .gsub(/\\\w*/, "").strip
  txt[txt.length - 1] = "" if txt.end_with?("}")
  txt.delete("\u0000").strip
end