Class: RemoteTable::Properties

Inherits:
Object
  • Object
show all
Defined in:
lib/remote_table/properties.rb

Overview

Represents the properties of a RemoteTable, whether they are explicitly set by the user or inferred automatically.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(t) ⇒ Properties

Returns a new instance of Properties.



8
9
10
11
# File 'lib/remote_table/properties.rb', line 8

def initialize(t)
  @t = t
  @current_options = t.options.symbolize_keys
end

Instance Attribute Details

#current_optionsObject (readonly)

Returns the value of attribute current_options.



6
7
8
# File 'lib/remote_table/properties.rb', line 6

def current_options
  @current_options
end

#tObject (readonly)

Returns the value of attribute t.



5
6
7
# File 'lib/remote_table/properties.rb', line 5

def t
  @t
end

Instance Method Details

#column_cssObject

The CSS selector used to find columns



116
117
118
# File 'lib/remote_table/properties.rb', line 116

def column_css
  current_options[:column_css]
end

#column_xpathObject

The XPath used to find columns



106
107
108
# File 'lib/remote_table/properties.rb', line 106

def column_xpath
  current_options[:column_xpath]
end

#compressionObject

The compression type.

Default: guessed from URI.

Can be specified as: :gz, :zip, :bz2, :exe (treated as :zip)



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/remote_table/properties.rb', line 125

def compression
  if current_options.has_key?(:compression)
    return current_options[:compression]
  end
  case ::File.extname(uri.path).downcase
  when /gz/, /gunzip/
    :gz
  when /zip/
    :zip
  when /bz2/, /bunzip2/
    :bz2
  when /exe/
    :exe
  end
end

#cropObject

Crop rows after this line



177
178
179
# File 'lib/remote_table/properties.rb', line 177

def crop
  current_options[:crop]
end

#cutObject

Cut columns up to this character



172
173
174
# File 'lib/remote_table/properties.rb', line 172

def cut
  current_options[:cut]
end

#delimiterObject

The delimiter

Default: “,”



96
97
98
# File 'lib/remote_table/properties.rb', line 96

def delimiter
  current_options[:delimiter] || ','
end

#errataObject

A hash of options to create a new Errata instance (see the Errata gem at github.com/seamusabshere/errata) to be used on every row.



212
213
214
215
216
217
218
219
# File 'lib/remote_table/properties.rb', line 212

def errata
  return unless current_options.has_key? :errata
  @errata ||= if current_options[:errata].is_a? ::Hash
    ::Errata.new current_options[:errata]
  else
    current_options[:errata]
  end
end

#external_encodingObject



85
86
87
# File 'lib/remote_table/properties.rb', line 85

def external_encoding
  'UTF-8'
end

#external_encoding_iconvObject



89
90
91
# File 'lib/remote_table/properties.rb', line 89

def external_encoding_iconv
  'UTF-8//TRANSLIT'
end

#filenameObject

The filename, which can be used to pick a file out of an archive.

Example:

RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :filename => '2008_FE_guide_ALL_rel_dates_-no sales-for DOE-5-1-08.csv'


167
168
169
# File 'lib/remote_table/properties.rb', line 167

def filename
  current_options[:filename]
end

#form_dataObject

Form data to send in with the download request



70
71
72
# File 'lib/remote_table/properties.rb', line 70

def form_data
  current_options[:form_data]
end

#formatObject

Get the format in the form of RemoteTable::Format::Excel, etc.

Note: treats all spreadsheets.google.com URLs as Format::Delimited (i.e., CSV)

Default: guessed from file extension (which is usually the same as the URI, but sometimes not if you pick out a specific file from an archive)

Can be specified as: :xlsx, :xls, :delimited (aka :csv and :tsv), :ods, :fixed_width, :html



228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/remote_table/properties.rb', line 228

def format
  return Format::Delimited if uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
  clue = if current_options.has_key?(:format)
    current_options[:format]
  else
    t.local_file.path
  end
  case clue.to_s.downcase
  when /xlsx/, /excelx/
    Format::Excelx
  when /xls/, /excel/
    Format::Excel
  when /csv/, /tsv/, /delimited/
    Format::Delimited
  when /ods/, /open_?office/
    Format::OpenOffice
  when /fixed_?width/
    Format::FixedWidth
  when /htm/
    Format::HTML
  when /xml/
    Format::XML
  else
    Format::Delimited
  end
end

#globObject

The glob used to pick a file out of an archive.

Example:

RemoteTable.new 'http://www.fueleconomy.gov/FEG/epadata/08data.zip', :glob => '/*.csv'


159
160
161
# File 'lib/remote_table/properties.rb', line 159

def glob
  current_options[:glob]
end

#headersObject

The headers specified by the user

Default: :first_row



43
44
45
# File 'lib/remote_table/properties.rb', line 43

def headers
  current_options[:headers].nil? ? :first_row : current_options[:headers]
end

#internal_encodingObject



81
82
83
# File 'lib/remote_table/properties.rb', line 81

def internal_encoding
  (current_options[:encoding] || 'UTF-8').upcase
end

#keep_blank_rowsObject

Whether to keep blank rows

Default: false



65
66
67
# File 'lib/remote_table/properties.rb', line 65

def keep_blank_rows
  current_options[:keep_blank_rows] || false
end

#output_classObject



51
52
53
# File 'lib/remote_table/properties.rb', line 51

def output_class
  headers == false ? ::Array : ::ActiveSupport::OrderedHash
end

#packingObject

The packing type.

Default: guessed from URI.

Can be specified as: :tar



146
147
148
149
150
151
152
153
# File 'lib/remote_table/properties.rb', line 146

def packing
  if current_options.has_key?(:packing)
    return current_options[:packing]
  end
  if uri.path =~ %r{\.tar(?:\.|$)}i
    :tar
  end
end

#rejectObject

A proc to call to decide whether to return a row.



207
208
209
# File 'lib/remote_table/properties.rb', line 207

def reject
  current_options[:reject]
end

#row_cssObject

The CSS selector used to find rows



111
112
113
# File 'lib/remote_table/properties.rb', line 111

def row_css
  current_options[:row_css]
end

#row_xpathObject

The XPath used to find rows



101
102
103
# File 'lib/remote_table/properties.rb', line 101

def row_xpath
  current_options[:row_xpath]
end

#schemaObject

The fixed-width schema, given as an array

Example:

RemoteTable.new('http://cloud.github.com/downloads/seamusabshere/remote_table/test2.fixed_width.txt',
                 :format => :fixed_width,
                 :skip => 1,
                 :schema => [[ 'header4', 10, { :type => :string }  ],
                             [  'spacer',  1 ],
                             [  'header5', 10, { :type => :string } ],
                             [  'spacer',  12 ],
                             [  'header6', 10, { :type => :string } ]])


192
193
194
# File 'lib/remote_table/properties.rb', line 192

def schema
  current_options[:schema]
end

#schema_nameObject

The name of the fixed-width schema according to FixedWidth



197
198
199
# File 'lib/remote_table/properties.rb', line 197

def schema_name
  current_options[:schema_name]
end

#selectObject

A proc to call to decide whether to return a row.



202
203
204
# File 'lib/remote_table/properties.rb', line 202

def select
  current_options[:select]
end

#sheetObject

The sheet specified by the user as a number or a string

Default: 0



58
59
60
# File 'lib/remote_table/properties.rb', line 58

def sheet
  current_options[:sheet] || 0
end

#skipObject

How many rows to skip

Default: 0



77
78
79
# File 'lib/remote_table/properties.rb', line 77

def skip
  current_options[:skip] || 0
end

#streamingObject

Whether to stream the rows without caching them. Saves memory, but you have to re-download the file every time you…

  • call []

  • call each

Defaults to false.



31
32
33
# File 'lib/remote_table/properties.rb', line 31

def streaming
  current_options[:streaming] || false
end

#update(options) ⇒ Object



13
14
15
# File 'lib/remote_table/properties.rb', line 13

def update(options)
  current_options.update options
end

#uriObject

The parsed URI of the file to get.



18
19
20
21
22
23
24
25
# File 'lib/remote_table/properties.rb', line 18

def uri
  return @uri if @uri.is_a?(::URI)
  @uri = ::URI.parse t.url
  if @uri.host == 'spreadsheets.google.com' or @uri.host == 'docs.google.com'
    @uri.query = 'output=csv&' + @uri.query.sub(/\&?output=.*?(\&|\z)/, '\1')
  end
  @uri
end

#use_first_row_as_header?Boolean

Returns:

  • (Boolean)


47
48
49
# File 'lib/remote_table/properties.rb', line 47

def use_first_row_as_header?
  headers == :first_row
end

#warn_on_multiple_downloadsObject

Defaults to true.



36
37
38
# File 'lib/remote_table/properties.rb', line 36

def warn_on_multiple_downloads
  current_options[:warn_on_multiple_downloads] != false
end