Module: Ultrasphinx

Defined in:
lib/ultrasphinx/spell.rb,
lib/ultrasphinx/fields.rb,
lib/ultrasphinx/search.rb,
lib/ultrasphinx/ultrasphinx.rb

Defined Under Namespace

Modules: Spell Classes: ConfigurationError, DaemonError, Exception, Fields, Search

Constant Summary collapse

SUBDIR =

internal file paths

"config/ultrasphinx"
DIR =
"#{RAILS_ROOT}/#{SUBDIR}"
CONF_PATH =
"#{DIR}/#{RAILS_ENV}.conf"
ENV_BASE_PATH =
"#{DIR}/#{RAILS_ENV}.base"
GENERIC_BASE_PATH =
"#{DIR}/default.base"
BASE_PATH =
(File.exist?(ENV_BASE_PATH) ? ENV_BASE_PATH : GENERIC_BASE_PATH)
MAX_INT =

some miscellaneous constants

2**32-1
MAX_WORDS =

maximum number of stopwords built

2**16
UNIFIED_INDEX_NAME =
"complete"
COLUMN_TYPES =
{:string => 'text', :text => 'text', :integer => 'numeric', :date => 'date', :datetime => 'date' }
CONFIG_MAP =
{:username => 'sql_user',
:password => 'sql_pass',
:host => 'sql_host',
:database => 'sql_db',
:port => 'sql_port',
:socket => 'sql_sock'}
OPTIONAL_SPHINX_KEYS =
['morphology', 'stopwords', 'min_word_len', 'charset_type', 'charset_table', 'docinfo']
SOURCE_DEFAULTS =

some default settings for the sphinx conf files

%(
strip_html = 0
index_html_attrs =
sql_query_post =
sql_range_step = 20000
)
ADAPTER_DEFAULTS =
{
  "mysql" => %(
type = mysql
sql_query_pre = SET SESSION group_concat_max_len = 65535
sql_query_pre = SET NAMES utf8
), 
  "postgresql" => %(
type = pgsql
)}
PLUGIN_SETTINGS =

introspect on the existing generated conf files

options_for('ultrasphinx', BASE_PATH)
DAEMON_SETTINGS =
options_for('searchd', BASE_PATH)
STOPWORDS_PATH =
"#{Ultrasphinx::PLUGIN_SETTINGS['path']}/stopwords.txt}"
MODEL_CONFIGURATION =
{}

Class Method Summary collapse

Class Method Details

.configureObject

Main SQL builder.



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/ultrasphinx/ultrasphinx.rb', line 131

def configure       
  load_constants
        
  puts "Rebuilding Ultrasphinx configurations for #{ENV['RAILS_ENV']} environment" 
  puts "Available models are #{MODEL_CONFIGURATION.keys.to_sentence}"
  File.open(CONF_PATH, "w") do |conf|
    conf.puts "\n# Auto-generated at #{Time.now}.\n# Hand modifications will be overwritten.\n"
    
    conf.puts "\n# #{BASE_PATH}"
    conf.puts open(BASE_PATH).read.sub(/^ultrasphinx.*?\{.*?\}/m, '') + "\n"
    
    sphinx_source_list = []
    
    conf.puts "\n# Source configuration\n\n"

    puts "Generating SQL"
    MODEL_CONFIGURATION.each_with_index do |model_options, class_id|
      model, options = model_options
      klass, source = model.constantize, model.tableize

#          puts "SQL for #{model}"
      
      sphinx_source_list << source
  
      conf.puts "source #{source}\n{"
      conf.puts SOURCE_DEFAULTS
                
      # apparently we're supporting postgres now
      connection_settings = klass.connection.instance_variable_get("@config")

      adapter_defaults = ADAPTER_DEFAULTS[connection_settings[:adapter]]
      raise ConfigurationError, "Unsupported database adapter" unless adapter_defaults
      conf.puts adapter_defaults
                
      connection_settings.each do |key, value|
        conf.puts "#{CONFIG_MAP[key]} = #{value}" if CONFIG_MAP[key]          
      end          
      
      table, pkey = klass.table_name, klass.primary_key
      condition_strings, join_strings = Array(options[:conditions]).map{|condition| "(#{condition})"}, []
      column_strings = ["(#{table}.#{pkey} * #{MODEL_CONFIGURATION.size} + #{class_id}) AS id", 
                                   "#{class_id} AS class_id", "'#{klass.name}' AS class"]   
      remaining_columns = Fields.instance.keys - ["class", "class_id"]
      
      conf.puts "\nsql_query_range = SELECT MIN(#{pkey}), MAX(#{pkey}) FROM #{table}"
      
      options[:fields].to_a.each do |f|
        column, as = f.is_a?(Hash) ? [f[:field], f[:as]] : [f, f]
        column_strings << Fields.instance.cast("#{table}.#{column}", as)
        remaining_columns.delete(as)
      end
      
      options[:includes].to_a.each do |join|
        join_klass = join[:model].constantize
        association = klass.reflect_on_association(join[:model].underscore.to_sym)
        if not association 
          if not join[:association_sql]
            raise ConfigurationError, "Unknown association from #{klass} to #{join[:model]}"
          else
            join_strings << join[:association_sql]
          end
        else
          join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON " + 
            if (macro = association.macro) == :belongs_to 
              "#{join_klass.table_name}.#{join_klass.primary_key} = #{table}.#{association.primary_key_name}" 
            elsif macro == :has_one
              "#{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.instance_variable_get('@foreign_key_name')}" 
            else
              raise ConfigurationError, "Unidentified association macro #{macro.inspect}"
            end
        end
        column_strings << "#{join_klass.table_name}.#{join[:field]} AS #{join[:as] or join[:field]}"
        remaining_columns.delete(join[:as] || join[:field])
      end
      
      options[:concats].to_a.select{|concat| concat[:model] and concat[:field]}.each do |group|
        # only has_many's or explicit sql right now
        join_klass = group[:model].constantize
        if group[:association_sql]
          join_strings << group[:association_sql]
        else
          association = klass.reflect_on_association(group[:association_name] ? group[:association_name].to_sym :  group[:model].underscore.pluralize.to_sym)
          join_strings << "LEFT OUTER JOIN #{join_klass.table_name} ON #{table}.#{klass.primary_key} = #{join_klass.table_name}.#{association.primary_key_name}" + (" AND (#{group[:conditions]})" if group[:conditions]).to_s # XXX make sure foreign key is right for polymorphic relationships
        end
        column_strings << Fields.instance.cast("GROUP_CONCAT(#{join_klass.table_name}.#{group[:field]} SEPARATOR ' ')", group[:as])
        remaining_columns.delete(group[:as])
      end
      
      options[:concats].to_a.select{|concat| concat[:fields]}.each do |concat|
        column_strings << Fields.instance.cast("CONCAT_WS(' ', #{concat[:fields].map{|field| "#{table}.#{field}"}.join(', ')})", concat[:as])
        remaining_columns.delete(concat[:as])
      end
        
#          puts "#{model} has #{remaining_columns.inspect} remaining"
      remaining_columns.each do |field|
        column_strings << Fields.instance.null(field)
      end
      
      query_strings = ["SELECT", column_strings.sort_by do |string| 
        # sphinx wants them always in the same order, but "id" must be first
        (field = string[/.*AS (.*)/, 1]) == "id" ? "*" : field
      end.join(", ")] 
      query_strings << "FROM #{table}"                      
      query_strings += join_strings.uniq
      query_strings << "WHERE #{table}.#{pkey} >= $start AND #{table}.#{pkey} <= $end"
      query_strings += condition_strings.uniq.map{|s| "AND #{s}"}
      query_strings << "GROUP BY id"
      
      conf.puts "sql_query = #{query_strings.join(" ")}"
      
      groups = []
      # group and date sorting params... this really only would have to be run once
      Fields.instance.each do |field, type|
        case type
          when 'numeric'
            groups << "sql_group_column = #{field}"
          when 'date'
            groups << "sql_date_column = #{field}"
        end
      end
      conf.puts "\n" + groups.sort_by{|s| s[/= (.*)/, 1]}.join("\n")
      conf.puts "\nsql_query_info = SELECT * FROM #{table} WHERE #{table}.#{pkey} = (($id - #{class_id}) / #{MODEL_CONFIGURATION.size})"           
      conf.puts "}\n\n"                
    end
    
    conf.puts "\n# Index configuration\n\n"
    

    # only output the unified index; no one uses the individual ones anyway        

    conf.puts "index #{UNIFIED_INDEX_NAME}"
    conf.puts "{"
    conf.puts sphinx_source_list.map {|s| "source = #{s}" }

    OPTIONAL_SPHINX_KEYS.each do |key|
      conf.puts "#{key} = #{PLUGIN_SETTINGS[key]}" if PLUGIN_SETTINGS[key]
    end
    
    conf.puts "path = #{PLUGIN_SETTINGS["path"]}/sphinx_index_#{UNIFIED_INDEX_NAME}"
    conf.puts "}\n\n"
  end
        
end

.load_constantsObject

Force all the indexed models to load and fill the MODEL_CONFIGURATION hash.



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/ultrasphinx/ultrasphinx.rb', line 103

def load_constants

  Dir["#{RAILS_ROOT}/app/models/**/*.rb"].each do |filename|
    next if filename =~ /\/(\.svn|CVS|\.bzr)\//
    begin
      open(filename) {|file| load filename if file.grep(/is_indexed/).any?}
    rescue Object => e
      say "warning; possibly critical autoload error on #{filename}"
      say e.inspect
    end
  end 

  # build the field-to-type mappings
  Fields.instance.configure(MODEL_CONFIGURATION)
end

.options_for(heading, path) ⇒ Object

Configuration file parser.



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ultrasphinx/ultrasphinx.rb', line 67

def self.options_for(heading, path)
  
  section = open(path).read[/^#{heading}.*?\{(.*?)\}/m, 1]    
  unless section
    Ultrasphinx.say "#{path} appears to be corrupted; please delete file"
    raise ConfigurationError, "Missing heading #{heading.inspect}" 
  end
  
  options = section.split("\n").map do |line|
    line =~ /\s*(.*?)\s*=\s*([^\#]*)/
    $1 ? [$1, $2.strip] : []
  end
  
  Hash[*options.flatten] 
end

.say(msg) ⇒ Object

Logger.



98
99
100
# File 'lib/ultrasphinx/ultrasphinx.rb', line 98

def say msg
  $stderr.puts "** ultrasphinx: #{msg}"
end

.verify_database_nameObject

Complain if the database names go out of sync.



120
121
122
123
124
125
126
127
# File 'lib/ultrasphinx/ultrasphinx.rb', line 120

def verify_database_name
  if File.exist? CONF_PATH
    if options_for("source", CONF_PATH)['sql_db'] != ActiveRecord::Base.connection.instance_variable_get("@config")[:database]
       say "warning; configured database name is out-of-date"
       say "please run 'rake ultrasphinx:configure'"
    end rescue nil
  end
end