Module: Polars::Selectors

Defined in:
lib/polars/selectors.rb

Class Method Summary collapse

Class Method Details

.allSelector

Select all columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)],
    "value" => [1_234_500, 5_000_555]
  },
  schema_overrides: {"value" => Polars::Int32}
)

Select all columns, casting them to string:

df.select(Polars.cs.all.cast(Polars::String))
# =>
# shape: (2, 2)
# ┌────────────┬─────────┐
# │ dt         ┆ value   │
# │ ---        ┆ ---     │
# │ str        ┆ str     │
# ╞════════════╪═════════╡
# │ 1999-12-31 ┆ 1234500 │
# │ 2024-01-01 ┆ 5000555 │
# └────────────┴─────────┘

Select all columns except for those matching the given dtypes:

df.select(Polars.cs.all - Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-01-01 │
# └────────────┘

Returns:



76
77
78
# File 'lib/polars/selectors.rb', line 76

def self.all
  Selector._from_rbselector(RbSelector.all)
end

.alpha(ascii_only: false, ignore_spaces: false) ⇒ Selector

Note:

Matching column names cannot contain any non-alphabetic characters. Note that the definition of "alphabetic" consists of all valid Unicode alphabetic characters (\p{Alphabetic}) by default; this can be changed by setting ascii_only: true.

Select all columns with alphabetic names (eg: only letters).

Examples:

df = Polars::DataFrame.new(
  {
    "no1" => [100, 200, 300],
    "café" => ["espresso", "latte", "mocha"],
    "t or f" => [true, false, nil],
    "hmm" => ["aaa", "bbb", "ccc"],
    "都市" => ["東京", "大阪", "京都"]
  }
)

Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here:

df.select(Polars.cs.alpha)
# =>
# shape: (3, 3)
# ┌──────────┬─────┬──────┐
# │ café     ┆ hmm ┆ 都市 │
# │ ---      ┆ --- ┆ ---  │
# │ str      ┆ str ┆ str  │
# ╞══════════╪═════╪══════╡
# │ espresso ┆ aaa ┆ 東京 │
# │ latte    ┆ bbb ┆ 大阪 │
# │ mocha    ┆ ccc ┆ 京都 │
# └──────────┴─────┴──────┘

Constrain the definition of "alphabetic" to ASCII characters only:

df.select(Polars.cs.alpha(ascii_only: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ hmm │
# │ --- │
# │ str │
# ╞═════╡
# │ aaa │
# │ bbb │
# │ ccc │
# └─────┘
df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true))
# =>
# shape: (3, 2)
# ┌────────┬─────┐
# │ t or f ┆ hmm │
# │ ---    ┆ --- │
# │ bool   ┆ str │
# ╞════════╪═════╡
# │ true   ┆ aaa │
# │ false  ┆ bbb │
# │ null   ┆ ccc │
# └────────┴─────┘

Select all columns except for those with alphabetic names:

df.select(~Polars.cs.alpha)
# =>
# shape: (3, 2)
# ┌─────┬────────┐
# │ no1 ┆ t or f │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 100 ┆ true   │
# │ 200 ┆ false  │
# │ 300 ┆ null   │
# └─────┴────────┘
df.select(~Polars.cs.alpha(ignore_spaces: true))
# =>
# shape: (3, 1)
# ┌─────┐
# │ no1 │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 100 │
# │ 200 │
# │ 300 │
# └─────┘

Parameters:

  • ascii_only (Boolean) (defaults to: false)

    Indicate whether to consider only ASCII alphabetic characters, or the full Unicode range of valid letters (accented, idiographic, etc).

  • ignore_spaces (Boolean) (defaults to: false)

    Indicate whether to ignore the presence of spaces in column names; if so, only the other (non-space) characters are considered.

Returns:



177
178
179
180
181
182
# File 'lib/polars/selectors.rb', line 177

def self.alpha(ascii_only: false, ignore_spaces: false)
  # note that we need to supply a pattern compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_space = ignore_spaces ? " " : ""
  Selector._from_rbselector(RbSelector.matches("^[#{re_alpha}#{re_space}]+$"))
end

.alphanumeric(ascii_only: false, ignore_spaces: false) ⇒ Selector

Note:

Matching column names cannot contain any non-alphabetic or integer characters. Note that the definition of "alphabetic" consists of all valid Unicode alphabetic characters (\p{Alphabetic}) and digit characters (\d) by default; this can be changed by setting ascii_only: true.

Select all columns with alphanumeric names (eg: only letters and the digits 0-9).

Examples:

Select columns with alphanumeric names:

df = Polars::DataFrame.new(
  {
    "1st_col" => [100, 200, 300],
    "flagged" => [true, false, true],
    "00prefix" => ["01:aa", "02:bb", "03:cc"],
    "last col" => ["x", "y", "z"]
  }
)
df.select(Polars.cs.alphanumeric)
# =>
# shape: (3, 2)
# ┌─────────┬──────────┐
# │ flagged ┆ 00prefix │
# │ ---     ┆ ---      │
# │ bool    ┆ str      │
# ╞═════════╪══════════╡
# │ true    ┆ 01:aa    │
# │ false   ┆ 02:bb    │
# │ true    ┆ 03:cc    │
# └─────────┴──────────┘
df.select(Polars.cs.alphanumeric(ignore_spaces: true))
# =>
# shape: (3, 3)
# ┌─────────┬──────────┬──────────┐
# │ flagged ┆ 00prefix ┆ last col │
# │ ---     ┆ ---      ┆ ---      │
# │ bool    ┆ str      ┆ str      │
# ╞═════════╪══════════╪══════════╡
# │ true    ┆ 01:aa    ┆ x        │
# │ false   ┆ 02:bb    ┆ y        │
# │ true    ┆ 03:cc    ┆ z        │
# └─────────┴──────────┴──────────┘

Select all columns except for those with alphanumeric names:

df.select(~Polars.cs.alphanumeric)
# =>
# shape: (3, 2)
# ┌─────────┬──────────┐
# │ 1st_col ┆ last col │
# │ ---     ┆ ---      │
# │ i64     ┆ str      │
# ╞═════════╪══════════╡
# │ 100     ┆ x        │
# │ 200     ┆ y        │
# │ 300     ┆ z        │
# └─────────┴──────────┘
df.select(~Polars.cs.alphanumeric(ignore_spaces: true))
# =>
# shape: (3, 1)
# ┌─────────┐
# │ 1st_col │
# │ ---     │
# │ i64     │
# ╞═════════╡
# │ 100     │
# │ 200     │
# │ 300     │
# └─────────┘

Parameters:

  • ascii_only (Boolean) (defaults to: false)

    Indicate whether to consider only ASCII alphabetic characters, or the full Unicode range of valid letters (accented, idiographic, etc).

  • ignore_spaces (Boolean) (defaults to: false)

    Indicate whether to ignore the presence of spaces in column names; if so, only the other (non-space) characters are considered.

Returns:



264
265
266
267
268
269
270
271
272
# File 'lib/polars/selectors.rb', line 264

def self.alphanumeric(ascii_only: false, ignore_spaces: false)
  # note that we need to supply patterns compatible with the *rust* regex crate
  re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}"
  re_digit = ascii_only ? "0-9" : "\\d"
  re_space = ignore_spaces ? " " : ""
  return Selector._from_rbselector(
    RbSelector.matches("^[#{re_alpha}#{re_digit}#{re_space}]+$")
  )
end

.array(inner = nil, width: nil) ⇒ Selector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all array columns.

Examples:

Select all array columns:

df = Polars::DataFrame.new(
  {
    "foo" => [["xx", "yy"], ["x", "y"]],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  },
  schema_overrides: {"foo" => Polars::Array.new(Polars::String, 2)}
)
df.select(Polars.cs.array)
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘

Select all columns except for those that are array:

df.select(~Polars.cs.array)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all array columns with a certain matching inner type:

df.select(Polars.cs.array(Polars.cs.string))
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘
df.select(Polars.cs.array(Polars.cs.integer))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘
df.select(Polars.cs.array(width: 2))
# =>
# shape: (2, 1)
# ┌───────────────┐
# │ foo           │
# │ ---           │
# │ array[str, 2] │
# ╞═══════════════╡
# │ ["xx", "yy"]  │
# │ ["x", "y"]    │
# └───────────────┘
df.select(Polars.cs.array(width: 3))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



795
796
797
798
# File 'lib/polars/selectors.rb', line 795

def self.array(inner = nil, width: nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.array(inner_s, width))
end

.binarySelector

Select all binary columns.

Examples:

df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]})
# =>
# shape: (1, 4)
# ┌──────────┬───────┬────────┬─────┐
# │ a        ┆ b     ┆ c      ┆ d   │
# │ ---      ┆ ---   ┆ ---    ┆ --- │
# │ binary   ┆ str   ┆ binary ┆ str │
# ╞══════════╪═══════╪════════╪═════╡
# │ b"hello" ┆ world ┆ b"!"   ┆ :)  │
# └──────────┴───────┴────────┴─────┘

Select binary columns and export as a hash:

df.select(Polars.cs.binary).to_h(as_series: false)
# => {"a"=>["hello"], "c"=>["!"]}

Select all columns except for those that are binary:

df.select(~Polars.cs.binary).to_h(as_series: false)
# => {"b"=>["world"], "d"=>[":)"]}

Returns:



297
298
299
# File 'lib/polars/selectors.rb', line 297

def self.binary
  by_dtype([Binary])
end

.booleanSelector

Select all boolean columns.

Examples:

df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0)
# =>
# shape: (4, 2)
# ┌─────┬────────┐
# │ n   ┆ n_even │
# │ --- ┆ ---    │
# │ i64 ┆ bool   │
# ╞═════╪════════╡
# │ 1   ┆ false  │
# │ 2   ┆ true   │
# │ 3   ┆ false  │
# │ 4   ┆ true   │
# └─────┴────────┘

Select and invert boolean columns:

df.with_columns(is_odd: Polars.cs.boolean.not_)
# =>
# shape: (4, 3)
# ┌─────┬────────┬────────┐
# │ n   ┆ n_even ┆ is_odd │
# │ --- ┆ ---    ┆ ---    │
# │ i64 ┆ bool   ┆ bool   │
# ╞═════╪════════╪════════╡
# │ 1   ┆ false  ┆ true   │
# │ 2   ┆ true   ┆ false  │
# │ 3   ┆ false  ┆ true   │
# │ 4   ┆ true   ┆ false  │
# └─────┴────────┴────────┘

Select all columns except for those that are boolean:

df.select(~Polars.cs.boolean)
# =>
# shape: (4, 1)
# ┌─────┐
# │ n   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# └─────┘

Returns:



349
350
351
# File 'lib/polars/selectors.rb', line 349

def self.boolean
  by_dtype([Boolean])
end

.by_dtype(*dtypes) ⇒ Selector

Select all columns matching the given dtypes.

Group by string columns and sum the numeric columns: df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort("other") # => # shape: (2, 2) # ┌───────┬──────────┐ # │ other ┆ value │ # │ --- ┆ --- │ # │ str ┆ i64 │ # ╞═══════╪══════════╡ # │ bar ┆ 5000555 │ # │ foo ┆ -3265500 │ # └───────┴──────────┘

Examples:

Select all columns with date or string dtypes:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1), Date.new(2010, 7, 5)],
    "value" => [1_234_500, 5_000_555, -4_500_000],
    "other" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cs.by_dtype(Polars::Date, Polars::String))
# =>
# shape: (3, 2)
# ┌────────────┬───────┐
# │ dt         ┆ other │
# │ ---        ┆ ---   │
# │ date       ┆ str   │
# ╞════════════╪═══════╡
# │ 1999-12-31 ┆ foo   │
# │ 2024-01-01 ┆ bar   │
# │ 2010-07-05 ┆ foo   │
# └────────────┴───────┘

Select all columns that are not of date or string dtype:

df.select(~Polars.cs.by_dtype(Polars::Date, Polars::String))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ value    │
# │ ---      │
# │ i64      │
# ╞══════════╡
# │ 1234500  │
# │ 5000555  │
# │ -4500000 │
# └──────────┘

Returns:



404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# File 'lib/polars/selectors.rb', line 404

def self.by_dtype(*dtypes)
  all_dtypes = []
  dtypes.each do |tp|
    if Utils.is_polars_dtype(tp) || tp.is_a?(Class)
      all_dtypes << tp
    elsif tp.is_a?(::Array)
      tp.each do |t|
        if !(Utils.is_polars_dtype(t) || t.is_a?(Class))
          msg = "invalid dtype: #{t.inspect}"
          raise TypeError, msg
        end
        all_dtypes << t
      end
    else
      msg = "invalid dtype: #{tp.inspect}"
      raise TypeError, msg
    end
  end

  Selector._by_dtype(all_dtypes)
end

.by_index(*indices, require_all: true) ⇒ Selector

Note:

Matching columns are returned in the order in which their indexes appear in the selector, not the underlying schema order.

Select all columns matching the given indices (or range objects).

Examples:

df = Polars::DataFrame.new(
  {
    "key" => ["abc"],
    **100.times.to_h { |i| ["c%02d" % i, 0.5 * i] }
  }
)
# =>
# shape: (1, 101)
# ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
# │ key ┆ c00 ┆ c01 ┆ c02 ┆ … ┆ c96  ┆ c97  ┆ c98  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ --- ┆   ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64 ┆   ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 0.5 ┆ 1.0 ┆ … ┆ 48.0 ┆ 48.5 ┆ 49.0 ┆ 49.5 │
# └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘

Select columns by index ("key" column and the two first/last columns):

df.select(Polars.cs.by_index(0, 1, 2, -2, -1))
# =>
# shape: (1, 5)
# ┌─────┬─────┬─────┬──────┬──────┐
# │ key ┆ c00 ┆ c01 ┆ c98  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 0.5 ┆ 49.0 ┆ 49.5 │
# └─────┴─────┴─────┴──────┴──────┘

Select the "key" column and use a range object to select various columns.

df.select(Polars.cs.by_index(0, (1...101).step(20)))
# =>
# shape: (1, 6)
# ┌─────┬─────┬──────┬──────┬──────┬──────┐
# │ key ┆ c00 ┆ c20  ┆ c40  ┆ c60  ┆ c80  │
# │ --- ┆ --- ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.0 ┆ 10.0 ┆ 20.0 ┆ 30.0 ┆ 40.0 │
# └─────┴─────┴──────┴──────┴──────┴──────┘
df.select(Polars.cs.by_index(0, (101...0).step(-25), require_all: false))
# =>
# shape: (1, 5)
# ┌─────┬──────┬──────┬──────┬─────┐
# │ key ┆ c75  ┆ c50  ┆ c25  ┆ c00 │
# │ --- ┆ ---  ┆ ---  ┆ ---  ┆ --- │
# │ str ┆ f64  ┆ f64  ┆ f64  ┆ f64 │
# ╞═════╪══════╪══════╪══════╪═════╡
# │ abc ┆ 37.5 ┆ 25.0 ┆ 12.5 ┆ 0.0 │
# └─────┴──────┴──────┴──────┴─────┘

Select all columns except for the even-indexed ones:

df.select(~Polars.cs.by_index((1...100).step(2)))
# =>
# shape: (1, 51)
# ┌─────┬─────┬─────┬─────┬───┬──────┬──────┬──────┬──────┐
# │ key ┆ c01 ┆ c03 ┆ c05 ┆ … ┆ c93  ┆ c95  ┆ c97  ┆ c99  │
# │ --- ┆ --- ┆ --- ┆ --- ┆   ┆ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ str ┆ f64 ┆ f64 ┆ f64 ┆   ┆ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞═════╪═════╪═════╪═════╪═══╪══════╪══════╪══════╪══════╡
# │ abc ┆ 0.5 ┆ 1.5 ┆ 2.5 ┆ … ┆ 46.5 ┆ 47.5 ┆ 48.5 ┆ 49.5 │
# └─────┴─────┴─────┴─────┴───┴──────┴──────┴──────┴──────┘

Parameters:

  • indices (Array)

    One or more column indices (or range objects). Negative indexing is supported.

Returns:



502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
# File 'lib/polars/selectors.rb', line 502

def self.by_index(*indices, require_all: true)
  all_indices = []
  indices.each do |idx|
    if idx.is_a?(Enumerable)
      all_indices.concat(idx.to_a)
    elsif idx.is_a?(Integer)
      all_indices << idx
    else
      msg = "invalid index value: #{idx.inspect}"
      raise TypeError, msg
    end
  end

  Selector._from_rbselector(RbSelector.by_index(all_indices, require_all))
end

.by_name(*names, require_all: true) ⇒ Selector

Note:

Matching columns are returned in the order in which they are declared in the selector, not the underlying schema order.

Select all columns matching the given names.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns by name:

df.select(Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Match any of the given columns by name:

df.select(Polars.cs.by_name("baz", "moose", "foo", "bear", require_all: false))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ baz ┆ foo │
# │ --- ┆ --- │
# │ f64 ┆ str │
# ╞═════╪═════╡
# │ 2.0 ┆ x   │
# │ 5.5 ┆ y   │
# └─────┴─────┘

Match all columns except for those given:

df.select(~Polars.cs.by_name("foo", "bar"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ baz ┆ zap   │
# │ --- ┆ ---   │
# │ f64 ┆ bool  │
# ╞═════╪═══════╡
# │ 2.0 ┆ false │
# │ 5.5 ┆ true  │
# └─────┴───────┘

Parameters:

  • names (Array)

    One or more names of columns to select.

  • require_all (Boolean) (defaults to: true)

    Whether to match all names (the default) or any of the names.

Returns:



579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
# File 'lib/polars/selectors.rb', line 579

def self.by_name(*names, require_all: true)
  all_names = []
  names.each do |nm|
    if nm.is_a?(::String)
      all_names << nm
    elsif nm.is_a?(::Array)
      nm.each do |n|
        if !n.is_a?(::String)
          msg = "invalid name: #{n.inspect}"
          raise TypeError, msg
        end
        all_names << n
      end
    else
      msg = "invalid name: #{nm.inspect}"
      raise TypeError, msg
    end
  end

  Selector._by_name(all_names, strict: require_all)
end

.categoricalSelector

Select all categorical columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["xx", "yy"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  },
  schema_overrides: {"foo" => Polars::Categorical}
)

Select all categorical columns:

df.select(Polars.cs.categorical)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ cat │
# ╞═════╡
# │ xx  │
# │ yy  │
# └─────┘

Select all columns except for those that are categorical:

df.select(~Polars.cs.categorical)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



930
931
932
# File 'lib/polars/selectors.rb', line 930

def self.categorical
  Selector._from_rbselector(RbSelector.categorical)
end

.contains(*substring) ⇒ Selector

Select columns whose names contain the given literal substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that contain the substring 'ba':

df.select(Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select columns that contain the substring 'ba' or the letter 'z':

df.select(Polars.cs.contains("ba", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ bar ┆ baz ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 123 ┆ 2.0 ┆ false │
# │ 456 ┆ 5.5 ┆ true  │
# └─────┴─────┴───────┘

Select all columns except for those that contain the substring 'ba':

df.select(~Polars.cs.contains("ba"))
# =>
# shape: (2, 2)
# ┌─────┬───────┐
# │ foo ┆ zap   │
# │ --- ┆ ---   │
# │ str ┆ bool  │
# ╞═════╪═══════╡
# │ x   ┆ false │
# │ y   ┆ true  │
# └─────┴───────┘

Parameters:

  • substring (Object)

    Substring(s) that matching column names should contain.

Returns:



989
990
991
992
993
994
# File 'lib/polars/selectors.rb', line 989

def self.contains(*substring)
  escaped_substring = _re_string(substring)
  raw_params = "^.*#{escaped_substring}.*$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.dateSelector

Select all date columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)]
  }
)

Select all date columns:

df.select(Polars.cs.date)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 1999-12-31 │
# │ 2024-08-09 │
# └────────────┘

Select all columns except for those that are dates:

df.select(~Polars.cs.date)
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ dtm                 │
# │ ---                 │
# │ datetime[ns]        │
# ╞═════════════════════╡
# │ 2001-05-07 10:25:00 │
# │ 2031-12-31 00:30:00 │
# └─────────────────────┘

Returns:



1033
1034
1035
# File 'lib/polars/selectors.rb', line 1033

def self.date
  by_dtype([Date])
end

.datetimeSelector

Select all datetime columns, optionally filtering by time unit/zone.

Returns:



1040
1041
1042
1043
1044
1045
1046
# File 'lib/polars/selectors.rb', line 1040

def self.datetime
  time_unit = ["ms", "us", "ns"]

  time_zone = [nil]

  Selector._from_rbselector(RbSelector.datetime(time_unit, time_zone))
end

.decimalSelector

Select all decimal columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [BigDecimal("123"), BigDecimal("456")],
    "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")],
  },
  schema_overrides: {"baz" => Polars::Decimal.new(10, 5)}
)

Select all decimal columns:

df.select(Polars.cs.decimal)
# =>
# shape: (2, 2)
# ┌──────────────┬───────────────┐
# │ bar          ┆ baz           │
# │ ---          ┆ ---           │
# │ decimal[*,0] ┆ decimal[10,5] │
# ╞══════════════╪═══════════════╡
# │ 123          ┆ 2.00050       │
# │ 456          ┆ -50.55550     │
# └──────────────┴───────────────┘

Select all columns except the decimal ones:


df.select(~Polars.cs.decimal)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:



1088
1089
1090
1091
# File 'lib/polars/selectors.rb', line 1088

def self.decimal
  # TODO: allow explicit selection by scale/precision?
  Selector._from_rbselector(RbSelector.decimal)
end

.digit(ascii_only: false) ⇒ Selector

Note:

Matching column names cannot contain any non-digit characters. Note that the definition of "digit" consists of all valid Unicode digit characters (\d) by default; this can be changed by setting ascii_only: true.

Select all columns having names consisting only of digits.

Examples:

df = Polars::DataFrame.new(
  {
    "key" => ["aaa", "bbb", "aaa", "bbb", "bbb"],
    "year" => [2001, 2001, 2025, 2025, 2001],
    "value" => [-25, 100, 75, -15, -5]
  }
).pivot(
  "year",
  values: "value",
  index: "key",
  aggregate_function: "sum"
)
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ key ┆ 2001 ┆ 2025 │
# │ --- ┆ ---  ┆ ---  │
# │ str ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ aaa ┆ -25  ┆ 75   │
# │ bbb ┆ 95   ┆ -15  │
# └─────┴──────┴──────┘

Select columns with digit names:

df.select(Polars.cs.digit)
# =>
# shape: (2, 2)
# ┌──────┬──────┐
# │ 2001 ┆ 2025 │
# │ ---  ┆ ---  │
# │ i64  ┆ i64  │
# ╞══════╪══════╡
# │ -25  ┆ 75   │
# │ 95   ┆ -15  │
# └──────┴──────┘

Select all columns except for those with digit names:

df.select(~Polars.cs.digit)
# =>
# shape: (2, 1)
# ┌─────┐
# │ key │
# │ --- │
# │ str │
# ╞═════╡
# │ aaa │
# │ bbb │
# └─────┘

Demonstrate use of ascii_only flag (by default all valid unicode digits are considered, but this can be constrained to ascii 0-9):

df = Polars::DataFrame.new({"१९९९" => [1999], "२०७७" => [2077], "3000": [3000]})
df.select(Polars.cs.digit)
# =>
# shape: (1, 3)
# ┌──────┬──────┬──────┐
# │ १९९९ ┆ २०७७ ┆ 3000 │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  │
# ╞══════╪══════╪══════╡
# │ 1999 ┆ 2077 ┆ 3000 │
# └──────┴──────┴──────┘
df.select(Polars.cs.digit(ascii_only: true))
# =>
# shape: (1, 1)
# ┌──────┐
# │ 3000 │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3000 │
# └──────┘

Returns:



1176
1177
1178
1179
# File 'lib/polars/selectors.rb', line 1176

def self.digit(ascii_only: false)
  re_digit = ascii_only ? "[0-9]" : "\\d"
  Selector._from_rbselector(RbSelector.matches("^#{re_digit}+$"))
end

.durationSelector

Select all duration columns, optionally filtering by time unit.

Returns:



1184
1185
1186
1187
1188
# File 'lib/polars/selectors.rb', line 1184

def self.duration
  time_unit = ["ms", "us", "ns"]

  Selector._from_rbselector(RbSelector.duration(time_unit))
end

.emptySelector

Select no columns.

This is useful for composition with other selectors.

Examples:

Polars::DataFrame.new({"a" => 1, "b" => 2}).select(Polars.cs.empty)
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



34
35
36
# File 'lib/polars/selectors.rb', line 34

def self.empty
  Selector._from_rbselector(RbSelector.empty)
end

.ends_with(*suffix) ⇒ Selector

Select columns that end with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [false, true]
  }
)

Select columns that end with the substring 'z':

df.select(Polars.cs.ends_with("z"))
# =>
# shape: (2, 1)
# ┌─────┐
# │ baz │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# │ 5.5 │
# └─────┘

Select columns that end with either the letter 'z' or 'r':

df.select(Polars.cs.ends_with("z", "r"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all columns except for those that end with the substring 'z':

df.select(~Polars.cs.ends_with("z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ foo ┆ bar ┆ zap   │
# │ --- ┆ --- ┆ ---   │
# │ str ┆ i64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ x   ┆ 123 ┆ false │
# │ y   ┆ 456 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • suffix (Object)

    Substring(s) that matching column names should end with.

Returns:



1245
1246
1247
1248
1249
1250
# File 'lib/polars/selectors.rb', line 1245

def self.ends_with(*suffix)
  escaped_suffix = _re_string(suffix)
  raw_params = "^.*#{escaped_suffix}$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.enumSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all enum columns.

Examples:

Select all enum columns:

df = Polars::DataFrame.new(
  {
    "foo" => ["xx", "yy"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
  },
  schema_overrides: {"foo" => Polars::Enum.new(["xx", "yy"])}
)
df.select(Polars.cs.enum)
# =>
# shape: (2, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ enum │
# ╞══════╡
# │ xx   │
# │ yy   │
# └──────┘

Select all columns except for those that are enum:

df.select(~Polars.cs.enum)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



642
643
644
# File 'lib/polars/selectors.rb', line 642

def self.enum
  Selector._from_rbselector(RbSelector.enum_)
end

.exclude(columns, *more_columns) ⇒ Selector

Note:

If excluding a single selector it is simpler to write as ~selector instead.

Select all columns except those matching the given columns, datatypes, or selectors.

Examples:

Exclude by column name(s):

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
df.select(Polars.cs.exclude("ba", "xx"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude using a column name, a selector, and a dtype:

df.select(Polars.cs.exclude("aa", Polars.cs.string, Polars::UInt32))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

  • columns (Object)

    One or more columns (col or name), datatypes, columns, or selectors representing the columns to exclude.

  • more_columns (Array)

    Additional columns, datatypes, or selectors to exclude, specified as positional arguments.

Returns:



1300
1301
1302
# File 'lib/polars/selectors.rb', line 1300

def self.exclude(columns, *more_columns)
  ~_combine_as_selector(columns, *more_columns)
end

.first(strict: true) ⇒ Selector

Select the first column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the first column:

df.select(Polars.cs.first)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Select everything except for the first column:

df.select(~Polars.cs.first)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ f64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 1   │
# └─────┴─────┴─────┘

Returns:



1343
1344
1345
# File 'lib/polars/selectors.rb', line 1343

def self.first(strict: true)
  Selector._from_rbselector(RbSelector.first(strict))
end

.floatSelector

Select all float columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0.0, 1.0]
  },
  schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64}
)

Select all float columns:

df.select(Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ baz ┆ zap │
# │ --- ┆ --- │
# │ f32 ┆ f64 │
# ╞═════╪═════╡
# │ 2.0 ┆ 0.0 │
# │ 5.5 ┆ 1.0 │
# └─────┴─────┘

Select all columns except for those that are float:

df.select(~Polars.cs.float)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ bar │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 123 │
# │ y   ┆ 456 │
# └─────┴─────┘

Returns:



1387
1388
1389
# File 'lib/polars/selectors.rb', line 1387

def self.float
  Selector._from_rbselector(RbSelector.float)
end

.integerSelector

Select all integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select all integer columns:

df.select(Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ zap │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 123 ┆ 0   │
# │ 456 ┆ 1   │
# └─────┴─────┘

Select all columns except for those that are integer:

df.select(~Polars.cs.integer)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ baz │
# │ --- ┆ --- │
# │ str ┆ f64 │
# ╞═════╪═════╡
# │ x   ┆ 2.0 │
# │ y   ┆ 5.5 │
# └─────┴─────┘

Returns:



1430
1431
1432
# File 'lib/polars/selectors.rb', line 1430

def self.integer
  Selector._from_rbselector(RbSelector.integer)
end

.last(strict: true) ⇒ Selector

Select the last column in the current scope.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)

Select the last column:

df.select(Polars.cs.last)
# =>
# shape: (2, 1)
# ┌─────┐
# │ zap │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Select everything except for the last column:

df.select(~Polars.cs.last)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ baz │
# │ --- ┆ --- ┆ --- │
# │ str ┆ i64 ┆ f64 │
# ╞═════╪═════╪═════╡
# │ x   ┆ 123 ┆ 2.0 │
# │ y   ┆ 456 ┆ 5.5 │
# └─────┴─────┴─────┘

Returns:



1587
1588
1589
# File 'lib/polars/selectors.rb', line 1587

def self.last(strict: true)
  Selector._from_rbselector(RbSelector.last(strict))
end

.list(inner = nil) ⇒ Selector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all list columns.

Examples:

Select all list columns:

df = Polars::DataFrame.new(
  {
    "foo" => [["xx", "yy"], ["x"]],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  }
)
df.select(Polars.cs.list)
# =>
# shape: (2, 1)
# ┌──────────────┐
# │ foo          │
# │ ---          │
# │ list[str]    │
# ╞══════════════╡
# │ ["xx", "yy"] │
# │ ["x"]        │
# └──────────────┘

Select all columns except for those that are list:

df.select(~Polars.cs.list)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Select all list columns with a certain matching inner type:

df.select(Polars.cs.list(Polars.cs.string))
# =>
# shape: (2, 1)
# ┌──────────────┐
# │ foo          │
# │ ---          │
# │ list[str]    │
# ╞══════════════╡
# │ ["xx", "yy"] │
# │ ["x"]        │
# └──────────────┘
df.select(Polars.cs.list(Polars.cs.integer))
# =>
# shape: (0, 0)
# ┌┐
# ╞╡
# └┘

Returns:



707
708
709
710
# File 'lib/polars/selectors.rb', line 707

def self.list(inner = nil)
  inner_s = !inner.nil? ? inner._rbselector : nil
  Selector._from_rbselector(RbSelector.list(inner_s))
end

.matches(pattern) ⇒ Selector

Select all columns that match the given regex pattern.

Examples:

Match column names containing an 'a', preceded by a character that is not 'z':

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 1]
  }
)
df.select(Polars.cs.matches("[^z]a"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Do not match column names ending in 'R' or 'z' (case-insensitively):

df.select(~Polars.cs.matches("(?i)R|z$"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ zap │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ x   ┆ 0   │
# │ y   ┆ 1   │
# └─────┴─────┘

Parameters:

  • pattern (String)

    A valid regular expression pattern, compatible with the regex crate.

Returns:



1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
# File 'lib/polars/selectors.rb', line 1631

def self.matches(pattern)
  if pattern == ".*"
    all
  else
    if pattern.start_with?(".*")
      pattern = pattern[2..]
    elsif pattern.end_with?(".*")
      pattern = pattern[..-3]
    end

    pfx = !pattern.start_with?("^") ? "^.*" : ""
    sfx = !pattern.end_with?("$") ? ".*$" : ""
    raw_params = "#{pfx}#{pattern}#{sfx}"

    Selector._from_rbselector(RbSelector.matches(raw_params))
  end
end

.nestedSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all nested columns.

A nested column is a list, array or struct.

Examples:

Select all nested columns:

df = Polars::DataFrame.new(
  {
    "foo" => [{"a" => "xx", "b" => "z"}, {"a" => "x", "b" => "y"}],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "wow" => [[1, 2], [3]]
  }
)
df.select(Polars.cs.nested)
# =>
# shape: (2, 2)
# ┌────────────┬───────────┐
# │ foo        ┆ wow       │
# │ ---        ┆ ---       │
# │ struct[2]  ┆ list[i64] │
# ╞════════════╪═══════════╡
# │ {"xx","z"} ┆ [1, 2]    │
# │ {"x","y"}  ┆ [3]       │
# └────────────┴───────────┘

Select all columns except for those that are nested:

df.select(~Polars.cs.nested)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



887
888
889
# File 'lib/polars/selectors.rb', line 887

def self.nested
  Selector._from_rbselector(RbSelector.nested)
end

.numericSelector

Select all numeric columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => ["x", "y"],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5],
    "zap" => [0, 0]
  },
  schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8},
)

Match all numeric columns:

df.select(Polars.cs.numeric)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ i16 ┆ f32 ┆ u8  │
# ╞═════╪═════╪═════╡
# │ 123 ┆ 2.0 ┆ 0   │
# │ 456 ┆ 5.5 ┆ 0   │
# └─────┴─────┴─────┘

Match all columns except for those that are numeric:

df.select(~Polars.cs.numeric)
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ x   │
# │ y   │
# └─────┘

Returns:



1689
1690
1691
# File 'lib/polars/selectors.rb', line 1689

def self.numeric
  Selector._from_rbselector(RbSelector.numeric)
end

.objectSelector

Select all object columns.

Returns:



1696
1697
1698
# File 'lib/polars/selectors.rb', line 1696

def self.object
  Selector._from_rbselector(RbSelector.object)
end

.signed_integerSelector

Select all signed integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all signed integer columns:

df.select(Polars.cs.signed_integer)
# =>
# shape: (2, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ -123 │
# │ -456 │
# └──────┘
df.select(~Polars.cs.signed_integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬─────┐
# │ bar  ┆ baz  ┆ zap │
# │ ---  ┆ ---  ┆ --- │
# │ u32  ┆ u64  ┆ str │
# ╞══════╪══════╪═════╡
# │ 3456 ┆ 7654 ┆ ab  │
# │ 6789 ┆ 4321 ┆ cd  │
# └──────┴──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:



1487
1488
1489
# File 'lib/polars/selectors.rb', line 1487

def self.signed_integer
  Selector._from_rbselector(RbSelector.signed_integer)
end

.starts_with(*prefix) ⇒ Selector

Select columns that start with the given substring(s).

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [1.0, 2.0],
    "bar" => [3.0, 4.0],
    "baz" => [5, 6],
    "zap" => [7, 8]
  }
)

Match columns starting with a 'b':

df.select(Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 5   │
# │ 4.0 ┆ 6   │
# └─────┴─────┘

Match columns starting with either the letter 'b' or 'z':

df.select(Polars.cs.starts_with("b", "z"))
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ bar ┆ baz ┆ zap │
# │ --- ┆ --- ┆ --- │
# │ f64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 3.0 ┆ 5   ┆ 7   │
# │ 4.0 ┆ 6   ┆ 8   │
# └─────┴─────┴─────┘

Match all columns except for those starting with 'b':

df.select(~Polars.cs.starts_with("b"))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ foo ┆ zap │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 7   │
# │ 2.0 ┆ 8   │
# └─────┴─────┘

Parameters:

  • prefix (Object)

    Substring(s) that matching column names should start with.

Returns:



1755
1756
1757
1758
1759
1760
# File 'lib/polars/selectors.rb', line 1755

def self.starts_with(*prefix)
  escaped_prefix = _re_string(prefix)
  raw_params = "^#{escaped_prefix}.*$"

  Selector._from_rbselector(RbSelector.matches(raw_params))
end

.string(include_categorical: false) ⇒ Selector

Select all String (and, optionally, Categorical) string columns.

df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string) shape: (2, 3) ┌─────┬─────┬─────┐ │ w ┆ x ┆ y │ │ --- ┆ --- ┆ --- │ │ str ┆ i64 ┆ f64 │ ╞═════╪═════╪═════╡ │ xx ┆ 0 ┆ 2.0 │ │ yy ┆ 6 ┆ 7.0 │ └─────┴─────┴─────┘

Examples:

df = Polars::DataFrame.new(
  {
    "w" => ["xx", "yy", "xx", "yy", "xx"],
    "x" => [1, 2, 1, 4, -2],
    "y" => [3.0, 4.5, 1.0, 2.5, -2.0],
    "z" => ["a", "b", "a", "b", "b"]
  },
).with_columns(
  z: Polars.col("z").cast(Polars::Categorical.new("lexical")),
)

Group by all string columns, sum the numeric columns, then sort by the string cols:

Group by all string and categorical columns:

df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort(
  Polars.cs.string(include_categorical: true)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬──────┐
# │ w   ┆ z   ┆ x   ┆ y    │
# │ --- ┆ --- ┆ --- ┆ ---  │
# │ str ┆ cat ┆ i64 ┆ f64  │
# ╞═════╪═════╪═════╪══════╡
# │ xx  ┆ a   ┆ 2   ┆ 4.0  │
# │ xx  ┆ b   ┆ -2  ┆ -2.0 │
# │ yy  ┆ b   ┆ 6   ┆ 7.0  │
# └─────┴─────┴─────┴──────┘

Returns:



1805
1806
1807
1808
1809
1810
1811
1812
# File 'lib/polars/selectors.rb', line 1805

def self.string(include_categorical: false)
  string_dtypes = [String]
  if include_categorical
    string_dtypes << Categorical
  end

  by_dtype(string_dtypes)
end

.structSelector

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Select all struct columns.

Examples:

Select all struct columns:

df = Polars::DataFrame.new(
  {
    "foo" => [{"a": "xx", "b": "z"}, {"a": "x", "b": "y"}],
    "bar" => [123, 456],
    "baz" => [2.0, 5.5]
  }
)
df.select(Polars.cs.struct)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ foo        │
# │ ---        │
# │ struct[2]  │
# ╞════════════╡
# │ {"xx","z"} │
# │ {"x","y"}  │
# └────────────┘

Select all columns except for those that are struct:

df.select(~Polars.cs.struct)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ bar ┆ baz │
# │ --- ┆ --- │
# │ i64 ┆ f64 │
# ╞═════╪═════╡
# │ 123 ┆ 2.0 │
# │ 456 ┆ 5.5 │
# └─────┴─────┘

Returns:



840
841
842
# File 'lib/polars/selectors.rb', line 840

def self.struct
  Selector._from_rbselector(RbSelector.struct_)
end

.temporalSelector

Select all temporal columns.

Examples:

Match all temporal columns:

df = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2021, 1, 1), Date.new(2021, 1, 2)],
    "tm" => [DateTime.new(2000, 1, 1, 12, 0, 0), DateTime.new(2000, 1, 1, 20, 30, 45)],
    "value" => [1.2345, 2.3456],
  },
  schema_overrides: {"tm" => Polars::Time}
)
df.select(Polars.cs.temporal)
# =>
# shape: (2, 2)
# ┌────────────┬──────────┐
# │ dt         ┆ tm       │
# │ ---        ┆ ---      │
# │ date       ┆ time     │
# ╞════════════╪══════════╡
# │ 2021-01-01 ┆ 12:00:00 │
# │ 2021-01-02 ┆ 20:30:45 │
# └────────────┴──────────┘

Match all temporal columns except for time columns:

df.select(Polars.cs.temporal - Polars.cs.time)
# =>
# shape: (2, 1)
# ┌────────────┐
# │ dt         │
# │ ---        │
# │ date       │
# ╞════════════╡
# │ 2021-01-01 │
# │ 2021-01-02 │
# └────────────┘

Match all columns except for temporal columns:

df.select(~Polars.cs.temporal)
# =>
# shape: (2, 1)
# ┌────────┐
# │ value  │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ 1.2345 │
# │ 2.3456 │
# └────────┘

Returns:



1864
1865
1866
# File 'lib/polars/selectors.rb', line 1864

def self.temporal
  Selector._from_rbselector(RbSelector.temporal)
end

.timeSelector

Select all time columns.

Examples:

df = Polars::DataFrame.new(
  {
    "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)],
    "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)],
    "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)]
  },
  schema_overrides: {"tm" => Polars::Time}
)

Select all time columns:

df.select(Polars.cs.time)
# =>
# shape: (2, 1)
# ┌──────────┐
# │ tm       │
# │ ---      │
# │ time     │
# ╞══════════╡
# │ 00:00:00 │
# │ 23:59:59 │
# └──────────┘

Select all columns except for those that are times:

df.select(~Polars.cs.time)
# =>
# shape: (2, 2)
# ┌─────────────────────┬────────────┐
# │ dtm                 ┆ dt         │
# │ ---                 ┆ ---        │
# │ datetime[ns]        ┆ date       │
# ╞═════════════════════╪════════════╡
# │ 2001-05-07 10:25:00 ┆ 1999-12-31 │
# │ 2031-12-31 00:30:00 ┆ 2024-08-09 │
# └─────────────────────┴────────────┘

Returns:



1907
1908
1909
# File 'lib/polars/selectors.rb', line 1907

def self.time
  by_dtype([Time])
end

.unsigned_integerSelector

Select all unsigned integer columns.

Examples:

df = Polars::DataFrame.new(
  {
    "foo" => [-123, -456],
    "bar" => [3456, 6789],
    "baz" => [7654, 4321],
    "zap" => ["ab", "cd"]
  },
  schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64}
)

Select all unsigned integer columns:

df.select(Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬──────┐
# │ bar  ┆ baz  │
# │ ---  ┆ ---  │
# │ u32  ┆ u64  │
# ╞══════╪══════╡
# │ 3456 ┆ 7654 │
# │ 6789 ┆ 4321 │
# └──────┴──────┘

Select all columns except for those that are unsigned integers:

df.select(~Polars.cs.unsigned_integer)
# =>
# shape: (2, 2)
# ┌──────┬─────┐
# │ foo  ┆ zap │
# │ ---  ┆ --- │
# │ i64  ┆ str │
# ╞══════╪═════╡
# │ -123 ┆ ab  │
# │ -456 ┆ cd  │
# └──────┴─────┘

Select all integer columns (both signed and unsigned):

df.select(Polars.cs.integer)
# =>
# shape: (2, 3)
# ┌──────┬──────┬──────┐
# │ foo  ┆ bar  ┆ baz  │
# │ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ u32  ┆ u64  │
# ╞══════╪══════╪══════╡
# │ -123 ┆ 3456 ┆ 7654 │
# │ -456 ┆ 6789 ┆ 4321 │
# └──────┴──────┴──────┘

Returns:



1544
1545
1546
# File 'lib/polars/selectors.rb', line 1544

def self.unsigned_integer
  Selector._from_rbselector(RbSelector.unsigned_integer)
end