Method: Enumerable#sort_by

Defined in:
enum.c

#sort_by {|element| ... } ⇒ Array #sort_byObject

With a block given, returns an array of elements of self, sorted according to the value returned by the block for each element. The ordering of equal elements is indeterminate and may be unstable.

Examples:

a = %w[xx xxx x xxxx]
a.sort_by {|s| s.size }        # => ["x", "xx", "xxx", "xxxx"]
a.sort_by {|s| -s.size }       # => ["xxxx", "xxx", "xx", "x"]
h = {foo: 2, bar: 1, baz: 0}
h.sort_by{|key, value| value } # => [[:baz, 0], [:bar, 1], [:foo, 2]]
h.sort_by{|key, value| key }   # => [[:bar, 1], [:baz, 0], [:foo, 2]]

With no block given, returns an Enumerator.

The current implementation of #sort_by generates an array of tuples containing the original collection element and the mapped value. This makes #sort_by fairly expensive when the keysets are simple.

require 'benchmark'

a = (1..100000).map { rand(100000) }

Benchmark.bm(10) do |b|
  b.report("Sort")    { a.sort }
  b.report("Sort by") { a.sort_by { |a| a } }
end

produces:

user     system      total        real
Sort        0.180000   0.000000   0.180000 (  0.175469)
Sort by     1.980000   0.040000   2.020000 (  2.013586)

However, consider the case where comparing the keys is a non-trivial operation. The following code sorts some files on modification time using the basic #sort method.

files = Dir["*"]
sorted = files.sort { |a, b| File.new(a).mtime <=> File.new(b).mtime }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This sort is inefficient: it generates two new File objects during every comparison. A slightly better technique is to use the Kernel#test method to generate the modification times directly.

files = Dir["*"]
sorted = files.sort { |a, b|
  test(?M, a) <=> test(?M, b)
}
sorted   #=> ["mon", "tues", "wed", "thurs"]

This still generates many unnecessary Time objects. A more efficient technique is to cache the sort keys (modification times in this case) before the sort. Perl users often call this approach a Schwartzian transform, after Randal Schwartz. We construct a temporary array, where each element is an array containing our sort key along with the filename. We sort this array, and then extract the filename from the result.

sorted = Dir["*"].collect { |f|
   [test(?M, f), f]
}.sort.collect { |f| f[1] }
sorted   #=> ["mon", "tues", "wed", "thurs"]

This is exactly what #sort_by does internally.

sorted = Dir["*"].sort_by { |f| test(?M, f) }
sorted   #=> ["mon", "tues", "wed", "thurs"]

To produce the reverse of a specific order, the following can be used:

ary.sort_by { ... }.reverse!

Overloads:

  • #sort_by {|element| ... } ⇒ Array

    Yields:

    • (element)

    Returns:



1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
# File 'enum.c', line 1704

static VALUE
enum_sort_by(VALUE obj)
{
    VALUE ary, buf;
    struct MEMO *memo;
    long i;
    struct sort_by_data *data;

    RETURN_SIZED_ENUMERATOR(obj, 0, 0, enum_size);

    if (RB_TYPE_P(obj, T_ARRAY) && RARRAY_LEN(obj) <= LONG_MAX/2) {
        ary = rb_ary_new2(RARRAY_LEN(obj)*2);
    }
    else {
        ary = rb_ary_new();
    }
    RBASIC_CLEAR_CLASS(ary);
    buf = rb_ary_hidden_new(SORT_BY_BUFSIZE*2);
    rb_ary_store(buf, SORT_BY_BUFSIZE*2-1, Qnil);
    memo = MEMO_NEW(0, 0, 0);
    data = (struct sort_by_data *)&memo->v1;
    RB_OBJ_WRITE(memo, &data->ary, ary);
    RB_OBJ_WRITE(memo, &data->buf, buf);
    data->n = 0;
    data->primitive_uniformed = SORT_BY_UNIFORMED((CMP_OPTIMIZABLE(FLOAT) && CMP_OPTIMIZABLE(INTEGER)),
                                                  CMP_OPTIMIZABLE(FLOAT),
                                                  CMP_OPTIMIZABLE(INTEGER));
    rb_block_call(obj, id_each, 0, 0, sort_by_i, (VALUE)memo);
    ary = data->ary;
    buf = data->buf;
    if (data->n) {
        rb_ary_resize(buf, data->n*2);
        rb_ary_concat(ary, buf);
    }
    if (RARRAY_LEN(ary) > 2) {
        if (data->primitive_uniformed) {
            RARRAY_PTR_USE(ary, ptr,
                           rb_uniform_intro_sort_2((struct rb_uniform_sort_data*)ptr,
                                                   (struct rb_uniform_sort_data*)(ptr + RARRAY_LEN(ary))));
        }
        else {
            RARRAY_PTR_USE(ary, ptr,
                           ruby_qsort(ptr, RARRAY_LEN(ary)/2, 2*sizeof(VALUE),
                                      sort_by_cmp, (void *)ary));
        }
    }
    if (RBASIC(ary)->klass) {
        rb_raise(rb_eRuntimeError, "sort_by reentered");
    }
    for (i=1; i<RARRAY_LEN(ary); i+=2) {
        RARRAY_ASET(ary, i/2, RARRAY_AREF(ary, i));
    }
    rb_ary_resize(ary, RARRAY_LEN(ary)/2);
    RBASIC_SET_CLASS_RAW(ary, rb_cArray);

    return ary;
}