Module: ParasInMongo

Extended by:
ParasInMongo
Included in:
ParasInMongo
Defined in:
lib/paras_in_mongo.rb

Instance Method Summary collapse

Instance Method Details

#file_in_mongo(filename, options = {}) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/paras_in_mongo.rb', line 9

def file_in_mongo(filename,options={})
  options = options.stringify_keys
  session = Moped::Session.new([ "#{options['host']}:#{options['port']}" ])
  session.use options['database']

  doc = Nokogiri::XML(File.open(filename).read)

  book_id = doc.search("book")[0]['id']
  title = doc.search("book info title")[0].text
  author = doc.search("book info author")[0].text
  pubdate = doc.search("book info pubdate")[0].text
  publisher = doc.search("book info publisher")[0].text

  paras = doc.search("para[key=yes]")

  source ={book: {title: title,book_id: book_id, author: author,pubdate: pubdate, publisher: publisher}}

  paras.each do |para|
    para_attrs = {'_id' => para['id']}
    content = para.search("content")[0].text

    keywords = []
    para.search("keyword").each do |keyword|
      keywords << {keyword: keyword.text, weight: keyword['weight'].to_i}
    end

    para_attrs = para_attrs.merge(keywords: keywords, content: content)
    section = para.parent.search("info title").text
    para_attrs = para_attrs.merge(source: source.merge(location: {section: section}))
    session[options['collection']].insert(para_attrs)
  end 
end