Class: ChupaText::Data

Inherits:
Object
  • Object
show all
Defined in:
lib/chupa-text/data.rb

Direct Known Subclasses

InputData, TextData, VirtualFileData

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Data

Returns a new instance of Data.



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/chupa-text/data.rb', line 66

def initialize(options={})
  @uri = nil
  @body = nil
  @size = nil
  @path = nil
  @mime_type = nil
  @attributes = Attributes.new
  @source = nil
  @screenshot = nil
  @need_screenshot = true
  @expected_screenshot_size = [200, 200]
  @options = options || {}
  source_data = @options[:source_data]
  if source_data
    merge!(source_data)
    @source = source_data
  end
end

Instance Attribute Details

#attributesAttributes (readonly)

Returns The attributes of the data.

Returns:



48
49
50
# File 'lib/chupa-text/data.rb', line 48

def attributes
  @attributes
end

#bodyString?

Returns The content of the data, nil if the data doesn't have any content.

Returns:

  • (String, nil)

    The content of the data, nil if the data doesn't have any content.



30
31
32
# File 'lib/chupa-text/data.rb', line 30

def body
  @body
end

#expected_screenshot_sizeArray<Integer, Integer>

Returns the expected screenshot size.

Returns:

  • (Array<Integer, Integer>)

    the expected screenshot size.



64
65
66
# File 'lib/chupa-text/data.rb', line 64

def expected_screenshot_size
  @expected_screenshot_size
end

#need_screenshot=(value) ⇒ Bool (writeonly)

Returns the specified value.

Parameters:

  • value (Bool)

    true when screenshot is needed.

Returns:

  • (Bool)

    the specified value



61
62
63
# File 'lib/chupa-text/data.rb', line 61

def need_screenshot=(value)
  @need_screenshot = value
end

#pathString?

Returns The path associated with the content of the data, nil if the data doesn't associated with any file.

The path may not be related with the original content. For example, "/tmp/XXX.txt" may be returned for the data of "http://example.com/XXX.txt".

This value is useful to use an external command to extract text and meta-data.

Returns:

  • (String, nil)

    The path associated with the content of the data, nil if the data doesn't associated with any file.

    The path may not be related with the original content. For example, "/tmp/XXX.txt" may be returned for the data of "http://example.com/XXX.txt".

    This value is useful to use an external command to extract text and meta-data.



45
46
47
# File 'lib/chupa-text/data.rb', line 45

def path
  @path
end

#screenshotScreenshot?

Returns The screenshot of the data. For example, the first page image for PDF file.text.

Returns:

  • (Screenshot, nil)

    The screenshot of the data. For example, the first page image for PDF file.text.



57
58
59
# File 'lib/chupa-text/data.rb', line 57

def screenshot
  @screenshot
end

#sizeInteger?

Returns The byte size of the data, nil if the data doesn't have any content.

Returns:

  • (Integer, nil)

    The byte size of the data, nil if the data doesn't have any content.



34
35
36
# File 'lib/chupa-text/data.rb', line 34

def size
  @size
end

#sourceData?

Returns The source of the data. For example, text data (hello.txt) in archive data (hello.tar) have the archive data in #source.

Returns:

  • (Data, nil)

    The source of the data. For example, text data (hello.txt) in archive data (hello.tar) have the archive data in #source.



53
54
55
# File 'lib/chupa-text/data.rb', line 53

def source
  @source
end

#uriURI?

Returns The URI of the data if the data is for remote or local file, nil if the data isn't associated with any URIs.

Returns:

  • (URI, nil)

    The URI of the data if the data is for remote or local file, nil if the data isn't associated with any URIs.



26
27
28
# File 'lib/chupa-text/data.rb', line 26

def uri
  @uri
end

Instance Method Details

#[](name) ⇒ Object



141
142
143
# File 'lib/chupa-text/data.rb', line 141

def [](name)
  @attributes[name]
end

#[]=(name, value) ⇒ Object



145
146
147
# File 'lib/chupa-text/data.rb', line 145

def []=(name, value)
  @attributes[name] = value
end

#extensionString?

Returns Normalized extension as String if #uri is not nil, nil otherwise. The normalized extension uses lower case like pdf not PDF.

Returns:

  • (String, nil)

    Normalized extension as String if #uri is not nil, nil otherwise. The normalized extension uses lower case like pdf not PDF.



167
168
169
170
171
172
173
174
# File 'lib/chupa-text/data.rb', line 167

def extension
  return nil if @uri.nil?
  if @uri.is_a?(URI::HTTP) and @uri.path.end_with?("/")
    "html"
  else
    File.extname(@uri.path).downcase.gsub(/\A\./, "")
  end
end

#initialize_copy(object) ⇒ Object



85
86
87
88
89
# File 'lib/chupa-text/data.rb', line 85

def initialize_copy(object)
  super
  @attributes = @attributes.dup
  self
end

#merge!(data) ⇒ void

This method returns an undefined value.

Merges metadata from data.

Parameters:

  • data (Data)

    The data to be merged.



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/chupa-text/data.rb', line 96

def merge!(data)
  self.uri = data.uri
  self.path = data.path
  data.attributes.each do |name, value|
    self[name] = value
  end
  if data.mime_type
    self["source-mime-types"] ||= []
    self["source-mime-types"].unshift(data.mime_type)
  end
  self.need_screenshot = data.need_screenshot?
  self.expected_screenshot_size = data.expected_screenshot_size
end

#mime_typeString?

Returns:

  • (String)

    The MIME type of the data. If MIME type isn't set, guesses MIME type from path and body.

  • (nil)

    If MIME type isn't set and it can't guess MIME type from path and body.



153
154
155
# File 'lib/chupa-text/data.rb', line 153

def mime_type
  @mime_type || guess_mime_type
end

#mime_type=(type) ⇒ Object

Parameters:

  • type (String, nil)

    The MIME type of the data. You can unset MIME type by nil. If you unset MIME type, MIME type is guessed from path and body of the data.



160
161
162
# File 'lib/chupa-text/data.rb', line 160

def mime_type=(type)
  @mime_type = type
end

#need_screenshot?Bool

Returns true when screenshot is needed if available.

Returns:

  • (Bool)

    true when screenshot is needed if available.



189
190
191
# File 'lib/chupa-text/data.rb', line 189

def need_screenshot?
  @need_screenshot
end

#open {|StringIO.new(body)| ... } ⇒ Object

Yields:

  • (StringIO.new(body))


137
138
139
# File 'lib/chupa-text/data.rb', line 137

def open
  yield(StringIO.new(body))
end

#text?Bool

Returns true if MIME type is "text/XXX", false otherwise.

Returns:

  • (Bool)

    true if MIME type is "text/XXX", false otherwise.



178
179
180
# File 'lib/chupa-text/data.rb', line 178

def text?
  (mime_type || "").start_with?("text/")
end

#text_plain?Bool

Returns true if MIME type is "text/plain", false otherwise.

Returns:

  • (Bool)

    true if MIME type is "text/plain", false otherwise.



184
185
186
# File 'lib/chupa-text/data.rb', line 184

def text_plain?
  mime_type == "text/plain"
end