Module: ScraperWiki

Defined in:
lib/scraperwiki.rb

Class Method Summary collapse

Class Method Details

._convdata(unique_keys, scraper_data) ⇒ Object

Internal function to check a row of data, convert to right format



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/scraperwiki.rb', line 95

def ScraperWiki._convdata(unique_keys, scraper_data)
    if unique_keys
        for key in unique_keys
            if !key.kind_of?(String) and !key.kind_of?(Symbol)
                raise 'unique_keys must each be a string or a symbol, this one is not: ' + key
            end
            if !scraper_data.include?(key) and !scraper_data.include?(key.to_sym)
                raise 'unique_keys must be a subset of data, this one is not: ' + key
            end
            if scraper_data[key] == nil and scraper_data[key.to_sym] == nil
                raise 'unique_key value should not be nil, this one is nil: ' + key
            end
        end
    end

    jdata = { }
    scraper_data.each_pair do |key, value|
        raise 'key must not have blank name' if not key

        key = key.to_s if key.kind_of?(Symbol)
        raise 'key must be string or symbol type: ' + key if key.class != String
        raise 'key must be simple text: ' + key if !/[a-zA-Z0-9_\- ]+$/.match(key)

        # convert formats
        if value.kind_of?(Date)
            value = value.iso8601
        end
        if value.kind_of?(Time)
            value = value.iso8601
            raise "internal error, timezone came out as non-UTC while converting to SQLite format" unless value.match(/([+-]00:00|Z)$/)
            value.gsub!(/([+-]00:00|Z)$/, '')
        end
        if ![Fixnum, Float, String, TrueClass, FalseClass, NilClass].include?(value.class)
            value = value.to_s
        end

        jdata[key] = value
    end
    return jdata
end

.close_sqliteObject



90
91
92
# File 'lib/scraperwiki.rb', line 90

def ScraperWiki.close_sqlite()
    SQLiteMagic.close
end

.get_var(name, default = nil, verbose = 2) ⇒ Object

Allows the user to retrieve a previously saved variable

Parameters

  • name = The variable name to fetch

  • default = The value to use if the variable name is not found

  • verbose = Verbosity level

Example

ScraperWiki::get_var(‘current’, 0)



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/scraperwiki.rb', line 147

def ScraperWiki.get_var(name, default=nil, verbose=2)
    begin
        result = ScraperWiki.sqliteexecute("select value_blob, type from swvariables where name=?", [name], verbose)
    rescue NoSuchTableSqliteException => e   
        return default
    end
    
    if !result.has_key?("data") 
        return default          
    end 
    
    if result["data"].length == 0
        return default
    end
    # consider casting to type
    svalue = result["data"][0][0]
    vtype = result["data"][0][1]
    if vtype == "Fixnum"
        return svalue.to_i
    end
    if vtype == "Float"
        return svalue.to_f
    end
    if vtype == "NilClass"
        return nil
    end
    return svalue
end

.raisesqliteerror(rerror) ⇒ Object

Raises:



198
199
200
201
202
203
204
205
206
# File 'lib/scraperwiki.rb', line 198

def ScraperWiki.raisesqliteerror(rerror)
    if /sqlite3.Error: no such table:/.match(rerror)  # old dataproxy
        raise NoSuchTableSqliteException.new(rerror)
    end
    if /DB Error: \(OperationalError\) no such table:/.match(rerror)
        raise NoSuchTableSqliteException.new(rerror)
    end
    raise SqliteException.new(rerror)
end

.save_sqlite(unique_keys, data, table_name = "swdata", verbose = 0) ⇒ Object

Saves the provided data into a local database for this scraper. Data is upserted into this table (inserted if it does not exist, updated if the unique keys say it does).

Parameters

  • unique_keys = A list of column names, that used together should be unique

  • data = A hash of the data where the Key is the column name, the Value the row

    value.  If sending lots of data this can be a list of hashes.
    
  • table_name = The name that the newly created table should use.

Example

ScraperWiki::save(, ‘id’=>1)



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/scraperwiki.rb', line 63

def ScraperWiki.save_sqlite(unique_keys, data, table_name="swdata",verbose=0)
    raise 'unique_keys must be nil or an array' if unique_keys != nil && !unique_keys.kind_of?(Array)
    raise 'data must have a non-nil value' if data == nil

    # convert :symbols to "strings"
    unique_keys = unique_keys.map { |x| x.kind_of?(Symbol) ? x.to_s : x }

    if data.class == Hash
        data = [ data ]
    elsif data.length == 0
        return
    end

    rjdata = [ ]
    for ldata in data
        ljdata = _convdata(unique_keys, ldata)
        rjdata.push(ljdata)

    end

    SQLiteMagic._do_save_sqlite(unique_keys, rjdata, table_name)
end

.save_var(name, value, verbose = 2) ⇒ Object

Allows the user to save a single variable (at a time) to carry state across runs of the scraper.

Parameters

  • name = The variable name

  • value = The value of the variable

  • verbose = Verbosity level

Example

ScraperWiki::save_var(‘current’, 100)



188
189
190
191
192
193
194
195
196
# File 'lib/scraperwiki.rb', line 188

def ScraperWiki.save_var(name, value, verbose=2)
    vtype = String(value.class)
    svalue = value.to_s
    if vtype != "Fixnum" and vtype != "String" and vtype != "Float" and vtype != "NilClass"
        puts "*** object of type "+vtype+" converted to string\n"
    end
    data = { "name" => name, "value_blob" => svalue, "type" => vtype }
    ScraperWiki.save_sqlite(unique_keys=["name"], data=data, table_name="swvariables", verbose=verbose)
end

.scrape(url, params = nil, agent = nil) ⇒ Object

The scrape method fetches the content from a webserver.

Parameters

  • url = The URL to fetch

  • params = The parameters to send with a POST request

  • _agent = A manually supplied useragent string

Example

ScraperWiki::scrape(‘scraperwiki.com’)



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/scraperwiki.rb', line 22

def ScraperWiki.scrape(url, params = nil, agent = nil)
  if agent
    client = HTTPClient.new(:agent_name => agent)
  else
    client = HTTPClient.new
  end
  client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
  if HTTPClient.respond_to?("client.transparent_gzip_decompression=")
    client.transparent_gzip_decompression = true
  end

  if params.nil?
    html = client.get_content(url)
  else
    html = client.post_content(url, params)
  end

  unless HTTPClient.respond_to?("client.transparent_gzip_decompression=")
    begin
      gz = Zlib::GzipReader.new(StringIO.new(html))
      return gz.read
    rescue
      return html
    end
  end
end

.sqliteexecute(query, data = nil, verbose = 2) ⇒ Object



86
87
88
# File 'lib/scraperwiki.rb', line 86

def ScraperWiki.sqliteexecute(query,data=nil, verbose=2)
  SQLiteMagic.sqliteexecute(query,data,verbose)
end