Method: Enumerable#sort_by

Defined in:: enum.c

#sort_by {|element| ... } ⇒ `Array` #sort_by ⇒ `Object`

With a block given, returns an array of elements of self, sorted according to the value returned by the block for each element. The ordering of equal elements is indeterminate and may be unstable.

Examples:

a = %w[xx xxx x xxxx]
a.sort_by {|s| s.size }        # => ["x", "xx", "xxx", "xxxx"]
a.sort_by {|s| -s.size }       # => ["xxxx", "xxx", "xx", "x"]
h = {foo: 2, bar: 1, baz: 0}
h.sort_by{|key, value| value } # => [[:baz, 0], [:bar, 1], [:foo, 2]]
h.sort_by{|key, value| key }   # => [[:bar, 1], [:baz, 0], [:foo, 2]]

With no block given, returns an Enumerator.

The current implementation of #sort_by generates an array of tuples containing the original collection element and the mapped value. This makes #sort_by fairly expensive when the keysets are simple.

require 'benchmark'

a = (1..100000).map { rand(100000) }

Benchmark.bm(10) do |b|
  b.report("Sort")    { a.sort }
  b.report("Sort by") { a.sort_by { |a| a } }
end

produces:

user     system      total        real
Sort        0.180000   0.000000   0.180000 (  0.175469)
Sort by     1.980000   0.040000   2.020000 (  2.013586)

However, consider the case where comparing the keys is a non-trivial operation. The following code sorts some files on modification time using the basic #sort method.

files = Dir["*"]
sorted = files.sort { |a, b| File.new(a).mtime <=> File.new(b).mtime }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This sort is inefficient: it generates two new File objects during every comparison. A slightly better technique is to use the Kernel#test method to generate the modification times directly.

files = Dir["*"]
sorted = files.sort { |a, b|
  test(?M, a) <=> test(?M, b)
}
sorted   #=> ["mon", "tues", "wed", "thurs"]

This still generates many unnecessary Time objects. A more efficient technique is to cache the sort keys (modification times in this case) before the sort. Perl users often call this approach a Schwartzian transform, after Randal Schwartz. We construct a temporary array, where each element is an array containing our sort key along with the filename. We sort this array, and then extract the filename from the result.

sorted = Dir["*"].collect { |f|
   [test(?M, f), f]
}.sort.collect { |f| f[1] }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This is exactly what #sort_by does internally.

sorted = Dir["*"].sort_by { |f| test(?M, f) }
sorted   #=> ["mon", "tues", "wed", "thurs"]

To produce the reverse of a specific order, the following can be used:

ary.sort_by { ... }.reverse!

Overloads:

#sort_by {|element| ... } ⇒ Array
Yields:
- (element)
Returns:
- (Array)

# File 'enum.c', line 1704

static VALUE
enum_sort_by(VALUE obj)
{
    VALUE ary, buf;
    struct MEMO *memo;
    long i;
    struct sort_by_data *data;

    RETURN_SIZED_ENUMERATOR(obj, 0, 0, enum_size);

    if (RB_TYPE_P(obj, T_ARRAY) && RARRAY_LEN(obj) <= LONG_MAX/2) {
        ary = rb_ary_new2(RARRAY_LEN(obj)*2);
    }
    else {
        ary = rb_ary_new();
    }
    RBASIC_CLEAR_CLASS(ary);
    buf = rb_ary_hidden_new(SORT_BY_BUFSIZE*2);
    rb_ary_store(buf, SORT_BY_BUFSIZE*2-1, Qnil);
    memo = MEMO_NEW(0, 0, 0);
    data = (struct sort_by_data *)&memo->v1;
    RB_OBJ_WRITE(memo, &data->ary, ary);
    RB_OBJ_WRITE(memo, &data->buf, buf);
    data->n = 0;
    data->primitive_uniformed = SORT_BY_UNIFORMED((CMP_OPTIMIZABLE(FLOAT) && CMP_OPTIMIZABLE(INTEGER)),
                                                  CMP_OPTIMIZABLE(FLOAT),
                                                  CMP_OPTIMIZABLE(INTEGER));
    rb_block_call(obj, id_each, 0, 0, sort_by_i, (VALUE)memo);
    ary = data->ary;
    buf = data->buf;
    if (data->n) {
        rb_ary_resize(buf, data->n*2);
        rb_ary_concat(ary, buf);
    }
    if (RARRAY_LEN(ary) > 2) {
        if (data->primitive_uniformed) {
            RARRAY_PTR_USE(ary, ptr,
                           rb_uniform_intro_sort_2((struct rb_uniform_sort_data*)ptr,
                                                   (struct rb_uniform_sort_data*)(ptr + RARRAY_LEN(ary))));
        }
        else {
            RARRAY_PTR_USE(ary, ptr,
                           ruby_qsort(ptr, RARRAY_LEN(ary)/2, 2*sizeof(VALUE),
                                      sort_by_cmp, (void *)ary));
        }
    }
    if (RBASIC(ary)->klass) {
        rb_raise(rb_eRuntimeError, "sort_by reentered");
    }
    for (i=1; i<RARRAY_LEN(ary); i+=2) {
        RARRAY_ASET(ary, i/2, RARRAY_AREF(ary, i));
    }
    rb_ary_resize(ary, RARRAY_LEN(ary)/2);
    RBASIC_SET_CLASS_RAW(ary, rb_cArray);

    return ary;
}

Method: Enumerable#sort_by

#sort_by {|element| ... } ⇒ Array #sort_by ⇒ Object

#sort_by {|element| ... } ⇒ `Array` #sort_by ⇒ `Object`