Class: Import

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/import.rb

Instance Method Summary collapse

Instance Method Details

#create_queued_listing_and_return_listing_key(doc, import) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'app/models/import.rb', line 121

def create_queued_listing_and_return_listing_key doc, import
  begin
    doc.css(import.repeating_element).each do |o|
      listing_data = {}
      Hash.from_xml(o.to_xml)[import.repeating_element].each_pair{|key, value| listing_data[key] = value }
      queued_listing = QueuedListing.new(import: import, listing_data: listing_data)
      queued_listing.save
      return Mapper::unique_identifier(queued_listing)
    end
  rescue Exception => e
    puts e.inspect
    exit if Rails.env.development?
    return nil
  end
end

#download_feed_to_import(import) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
# File 'app/models/import.rb', line 93

def download_feed_to_import import
  filename = [Time.now.to_s.parameterize, import.source_url.split('/').last].join
  filepath = Rails.root.join('tmp', filename).to_s
  File.delete(filepath) if File.file? filepath
  open(filepath, 'wb') do |file|
    file << open(import.source_url, 
              http_basic_authentication: [import.source_user, import.source_pass], 
              allow_redirections: :all
            ).read
  end
  filepath
end

#get_open_and_closing_tag_for(repeating_element) ⇒ Object



106
107
108
# File 'app/models/import.rb', line 106

def get_open_and_closing_tag_for repeating_element
  ApplicationController.helpers.(repeating_element, "\n").split
end

#get_xml_header(filepath, repeating_element) ⇒ Object



110
111
112
113
114
115
116
117
118
119
# File 'app/models/import.rb', line 110

def get_xml_header filepath, repeating_element
  stream = ''
  open_tag = get_open_and_closing_tag_for(repeating_element).first
  File.foreach(filepath) do |line|
    stream += line
    pos = stream.index(open_tag)
    return stream[0..pos-1] if pos
  end
  nil # Just in cases
end

#new_source_data_exists?Boolean

Returns:

  • (Boolean)


34
35
36
37
38
39
40
41
# File 'app/models/import.rb', line 34

def new_source_data_exists?
  source_url_last_modified = self.source_url_last_modified
  if source_url_last_modified.present? && self.source_data_modified.present?
    DateTime.parse(source_url_last_modified.to_s) > DateTime.parse(self.source_data_modified.to_s) ? true : false
  else
    true
  end
end

#remove_listings_not_present(fresh_listing_keys) ⇒ Object



14
15
16
17
18
19
20
21
# File 'app/models/import.rb', line 14

def remove_listings_not_present fresh_listing_keys
  existing_listing_keys = self.listings.all.pluck(:listing_key)
  stale_listing_keys = existing_listing_keys.delete_if{|key| fresh_listing_keys.include? key }
  stale_listing_keys.each do |listing_key|
    Listing.find_by(listing_key: listing_key).destroy
  end
  stale_listing_keys
end

#run_importObject



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'app/models/import.rb', line 43

def run_import
  if self.status == 'active'
    if self.new_source_data_exists?
      
      self.update_attribute(:status, :running)
      source_data_modified = self.source_url_last_modified

      l, count, found_listing_keys, snapshots, stream = 0, 0, [], [], ''
      open_tag, close_tag = get_open_and_closing_tag_for self.repeating_element

      # Grab a file to work with
      filepath = download_feed_to_import self
      filepath = uncompress_and_return_new_filepath(filepath) if filepath.split('.').last.downcase == 'gz'
    
      # Grab the XML header to avoid namespace errors later 
      xml_header = get_xml_header filepath, self.repeating_element

      start_time = Time.now
      import_result = ImportResult.create(import: self, start_time: start_time, source_data_modified: source_data_modified)
      File.foreach(filepath) do |line|
        stream += line
        while (from_here = stream.index(open_tag)) && (to_there = stream.index(close_tag))
          xml = stream[from_here..to_there + (close_tag.length-1)]
          doc = Nokogiri::XML([xml_header, xml].join).remove_namespaces!
          found_listing_keys << create_queued_listing_and_return_listing_key(doc, self)
          stream.gsub!(xml, '')
          if ((l += 1) % 100).zero?
            GC.start
            snapshots << [l, l/(Time.now - start_time)]
          end
        end
      end
      end_time = Time.now
      removed_listing_keys = self.remove_listings_not_present(found_listing_keys)
      self.assign_attributes({
        status: :active,
        source_data_modified: source_data_modified
      })
      self.save
      import_result.assign_attributes({
        end_time: end_time,
        found_listing_keys: found_listing_keys,
        removed_listing_keys: removed_listing_keys.inspect
      })
      import_result.save
      File.delete(filepath)
    end
  end
end

#set_import_formatObject



23
24
25
# File 'app/models/import.rb', line 23

def set_import_format
  self.import_format = ImportFormat.find_by(name: 'reso') unless self.import_format.present?
end

#source_url_last_modifiedObject



27
28
29
30
31
32
# File 'app/models/import.rb', line 27

def source_url_last_modified
  open(self.source_url, 
    http_basic_authentication: [self.source_user, self.source_pass], 
    allow_redirections: :all
  ){|f| return f.last_modified }
end

#uncompress_and_return_new_filepath(filepath) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
# File 'app/models/import.rb', line 137

def uncompress_and_return_new_filepath filepath
  output_path = [filepath, '.xml'].join
  File.delete(output_path) if File.file? output_path
  Zlib::GzipReader.open(filepath) do |gz|
    File.open(output_path, "w") do |g|
      IO.copy_stream(gz, g)
    end
  end
  File.delete(filepath)
  output_path
end