Class: Import

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/import.rb

Instance Method Summary collapse

Instance Method Details

#create_queued_listing_and_return_listing_key(doc, import) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'app/models/import.rb', line 119

def create_queued_listing_and_return_listing_key doc, import
  begin
    doc.css(import.repeating_element).each do |o|
      listing_data = {}
      Hash.from_xml(o.to_xml)[import.repeating_element].each_pair{|key, value| listing_data[key] = value }
      queued_listing = QueuedListing.new(import: import, listing_data: listing_data)
      queued_listing.save
      return Mapper::unique_identifier(queued_listing)
    end
  rescue Exception => e
    puts e.inspect
    exit if Rails.env.development?
    return nil
  end
end

#download_feed_to_import(import) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
# File 'app/models/import.rb', line 91

def download_feed_to_import import
  filename = [Time.now.to_s.parameterize, import.source_url.split('/').last].join
  filepath = Rails.root.join('tmp', filename).to_s
  File.delete(filepath) if File.file? filepath
  open(filepath, 'wb') do |file|
    file << open(import.source_url, 
              http_basic_authentication: [import.source_user, import.source_pass], 
              allow_redirections: :all
            ).read
  end
  filepath
end

#get_open_and_closing_tag_for(repeating_element) ⇒ Object



104
105
106
# File 'app/models/import.rb', line 104

def get_open_and_closing_tag_for repeating_element
  ApplicationController.helpers.(repeating_element, "\n").split
end

#get_xml_header(filepath, repeating_element) ⇒ Object



108
109
110
111
112
113
114
115
116
117
# File 'app/models/import.rb', line 108

def get_xml_header filepath, repeating_element
  stream = ''
  open_tag = get_open_and_closing_tag_for(repeating_element).first
  File.foreach(filepath) do |line|
    stream += line
    pos = stream.index(open_tag)
    return stream[0..pos-1] if pos
  end
  nil # Just in cases
end

#new_source_data_exists?Boolean



34
35
36
37
38
39
40
41
# File 'app/models/import.rb', line 34

def new_source_data_exists?
  source_url_last_modified = self.source_url_last_modified
  if source_url_last_modified.present? && self.source_data_modified.present?
    DateTime.parse(source_url_last_modified.to_s) > DateTime.parse(self.source_data_modified.to_s) ? true : false
  else
    true
  end
end

#remove_listings_not_present(fresh_listing_keys) ⇒ Object



14
15
16
17
18
19
20
21
# File 'app/models/import.rb', line 14

def remove_listings_not_present fresh_listing_keys
  existing_listing_keys = self.listings.all.pluck(:listing_key)
  stale_listing_keys = existing_listing_keys.delete_if{|key| fresh_listing_keys.include? key }
  stale_listing_keys.each do |listing_key|
    Listing.find_by(listing_key: listing_key).destroy
  end
  stale_listing_keys
end

#run_importObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'app/models/import.rb', line 43

def run_import
  if self.status == 'active'
    self.update_attribute(:status, :running)
    if self.new_source_data_exists?
      
      source_data_modified = self.source_url_last_modified
      self.update_attribute(:source_data_modified, source_data_modified)

      l, count, found_listing_keys, snapshots, stream = 0, 0, [], [], ''
      open_tag, close_tag = get_open_and_closing_tag_for self.repeating_element

      # Grab a file to work with
      filepath = download_feed_to_import self
      filepath = uncompress_and_return_new_filepath(filepath) if filepath.split('.').last.downcase == 'gz'
    
      # Grab the XML header to avoid namespace errors later 
      xml_header = get_xml_header filepath, self.repeating_element

      start_time = Time.now
      import_result = ImportResult.create(import: self, start_time: start_time, source_data_modified: source_data_modified)
      File.foreach(filepath) do |line|
        stream += line
        while (from_here = stream.index(open_tag)) && (to_there = stream.index(close_tag))
          xml = stream[from_here..to_there + (close_tag.length-1)]
          doc = Nokogiri::XML([xml_header, xml].join).remove_namespaces!
          found_listing_keys << create_queued_listing_and_return_listing_key(doc, self)
          stream.gsub!(xml, '')
          if ((l += 1) % 100).zero?
            GC.start
            snapshots << [l, l/(Time.now - start_time)]
          end
        end
      end
      end_time = Time.now
      self.update_attribute(:status, :active)
      removed_listing_keys = self.remove_listings_not_present(found_listing_keys)
      import_result.assign_attributes({
        end_time: end_time,
        found_listing_keys: found_listing_keys,
        removed_listing_keys: removed_listing_keys.inspect
      })
      import_result.save
      File.delete(filepath)
    end
    self.update_attribute(:status, :active)
  end
end

#set_import_formatObject



23
24
25
# File 'app/models/import.rb', line 23

def set_import_format
  self.import_format = ImportFormat.find_by(name: 'reso') unless self.import_format.present?
end

#source_url_last_modifiedObject



27
28
29
30
31
32
# File 'app/models/import.rb', line 27

def source_url_last_modified
  open(self.source_url, 
    http_basic_authentication: [self.source_user, self.source_pass], 
    allow_redirections: :all
  ){|f| return f.last_modified }
end

#uncompress_and_return_new_filepath(filepath) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
# File 'app/models/import.rb', line 135

def uncompress_and_return_new_filepath filepath
  output_path = [filepath, '.xml'].join
  File.delete(output_path) if File.file? output_path
  Zlib::GzipReader.open(filepath) do |gz|
    File.open(output_path, "w") do |g|
      IO.copy_stream(gz, g)
    end
  end
  File.delete(filepath)
  output_path
end