Class: NewsCrawler::Storage::RawData::MongoStorage

Inherits:
RawDataEngine
  • Object
show all
Includes:
Mongo
Defined in:
lib/news_crawler/storage/raw_data/mongo_storage.rb

Overview

Raw data storage implement using MongoDB

Constant Summary collapse

NAME =
'mongo'

Instance Method Summary collapse

Methods inherited from RawDataEngine

get_engines, inherited

Constructor Details

#initialize(*opts) ⇒ MongoStorage

Returns a new instance of MongoStorage.



38
39
40
41
42
43
44
# File 'lib/news_crawler/storage/raw_data/mongo_storage.rb', line 38

def initialize(*opts)
  config = (SimpleConfig.for :application)
  client = MongoClient.new(config.mongodb.host, config.mongodb.port)
  db = client[config.mongodb.db_name]
  @coll = db[config.prefix + '_' + config.suffix.raw_data]
  @coll.ensure_index({:url => Mongo::ASCENDING}, {:unique => true})
end

Instance Method Details

#add(url, body) ⇒ Object

Add entry to raw data collection, overwrite old data

Parameters:

  • url (String)
  • body (String)


49
50
51
52
53
54
# File 'lib/news_crawler/storage/raw_data/mongo_storage.rb', line 49

def add(url, body)
  body.encode!('utf-8', :invalid => :replace, :undef => :replace)
  @coll.update({:url   => url},
               {:$set  => {:body => body}},
               {:upsert => true})
end

#clearObject



73
74
75
# File 'lib/news_crawler/storage/raw_data/mongo_storage.rb', line 73

def clear
  @coll.remove
end

#countObject

Get number of raw data entries



69
70
71
# File 'lib/news_crawler/storage/raw_data/mongo_storage.rb', line 69

def count
  @coll.count
end

#find_by_url(url) ⇒ String?

Find document with correspond url

Parameters:

  • url (String)

Returns:

  • (String, nil)


59
60
61
62
63
64
65
66
# File 'lib/news_crawler/storage/raw_data/mongo_storage.rb', line 59

def find_by_url(url)
  result = @coll.find_one({:url => url})
  if (!result.nil?)
    result['body']
  else
    nil
  end
end