Class: Polars::Expr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/expr.rb

Overview

Expressions that can be used in various contexts.

Instance Method Summary collapse

Instance Method Details

#!Expr

Performs boolean not.

Returns:



134
135
136
# File 'lib/polars/expr.rb', line 134

def !
  is_not
end

#!=(other) ⇒ Expr

Not equal.

Returns:



113
114
115
# File 'lib/polars/expr.rb', line 113

def !=(other)
  _from_rbexpr(_rbexpr.neq(_to_expr(other)._rbexpr))
end

#%(other) ⇒ Expr

Returns the modulo.

Returns:



77
78
79
# File 'lib/polars/expr.rb', line 77

def %(other)
  _from_rbexpr(_rbexpr % _to_rbexpr(other))
end

#&(other) ⇒ Expr

Bitwise AND.

Returns:



35
36
37
# File 'lib/polars/expr.rb', line 35

def &(other)
  _from_rbexpr(_rbexpr._and(_to_rbexpr(other)))
end

#*(other) ⇒ Expr

Performs multiplication.

Returns:



63
64
65
# File 'lib/polars/expr.rb', line 63

def *(other)
  _from_rbexpr(_rbexpr * _to_rbexpr(other))
end

#**(power) ⇒ Expr

Raises to the power of exponent.

Returns:



84
85
86
87
# File 'lib/polars/expr.rb', line 84

def **(power)
  exponent = Utils.expr_to_lit_or_expr(power)
  _from_rbexpr(_rbexpr.pow(exponent._rbexpr))
end

#+(other) ⇒ Expr

Performs addition.

Returns:



49
50
51
# File 'lib/polars/expr.rb', line 49

def +(other)
  _from_rbexpr(_rbexpr + _to_rbexpr(other))
end

#-(other) ⇒ Expr

Performs subtraction.

Returns:



56
57
58
# File 'lib/polars/expr.rb', line 56

def -(other)
  _from_rbexpr(_rbexpr - _to_rbexpr(other))
end

#-@Expr

Performs negation.

Returns:



141
142
143
# File 'lib/polars/expr.rb', line 141

def -@
  _from_rbexpr(_rbexpr.neg)
end

#/(other) ⇒ Expr

Performs division.

Returns:



70
71
72
# File 'lib/polars/expr.rb', line 70

def /(other)
  _from_rbexpr(_rbexpr / _to_rbexpr(other))
end

#<(other) ⇒ Expr

Less than.

Returns:



120
121
122
# File 'lib/polars/expr.rb', line 120

def <(other)
  _from_rbexpr(_rbexpr.lt(_to_expr(other)._rbexpr))
end

#<=(other) ⇒ Expr

Less than or equal.

Returns:



99
100
101
# File 'lib/polars/expr.rb', line 99

def <=(other)
  _from_rbexpr(_rbexpr.lt_eq(_to_expr(other)._rbexpr))
end

#==(other) ⇒ Expr

Equal.

Returns:



106
107
108
# File 'lib/polars/expr.rb', line 106

def ==(other)
  _from_rbexpr(_rbexpr.eq(_to_expr(other)._rbexpr))
end

#>(other) ⇒ Expr

Greater than.

Returns:



127
128
129
# File 'lib/polars/expr.rb', line 127

def >(other)
  _from_rbexpr(_rbexpr.gt(_to_expr(other)._rbexpr))
end

#>=(other) ⇒ Expr

Greater than or equal.

Returns:



92
93
94
# File 'lib/polars/expr.rb', line 92

def >=(other)
  _from_rbexpr(_rbexpr.gt_eq(_to_expr(other)._rbexpr))
end

#^(other) ⇒ Expr

Bitwise XOR.

Returns:



28
29
30
# File 'lib/polars/expr.rb', line 28

def ^(other)
  _from_rbexpr(_rbexpr._xor(_to_rbexpr(other)))
end

#_hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil) ⇒ Expr

Hash the elements in the selection.

The hash value is of type :u64.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => ["x", nil, "z"]
  }
)
df.with_column(Polars.all._hash(10, 20, 30, 40))
# =>
# shape: (3, 2)
# ┌──────────────────────┬──────────────────────┐
# │ a                    ┆ b                    │
# │ ---                  ┆ ---                  │
# │ u64                  ┆ u64                  │
# ╞══════════════════════╪══════════════════════╡
# │ 4629889412789719550  ┆ 6959506404929392568  │
# │ 16386608652769605760 ┆ 11638928888656214026 │
# │ 11638928888656214026 ┆ 11040941213715918520 │
# └──────────────────────┴──────────────────────┘

Parameters:

  • seed (Integer) (defaults to: 0)

    Random seed parameter. Defaults to 0.

  • seed_1 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

  • seed_2 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

  • seed_3 (Integer) (defaults to: nil)

    Random seed parameter. Defaults to seed if not set.

Returns:



3754
3755
3756
3757
3758
3759
3760
# File 'lib/polars/expr.rb', line 3754

def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
  k0 = seed
  k1 = seed_1.nil? ? seed : seed_1
  k2 = seed_2.nil? ? seed : seed_2
  k3 = seed_3.nil? ? seed : seed_3
  _from_rbexpr(_rbexpr._hash(k0, k1, k2, k3))
end

#absExpr

Compute absolute values.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [-1.0, 0.0, 1.0, 2.0]
  }
)
df.select(Polars.col("A").abs)
# =>
# shape: (4, 1)
# ┌─────┐
# │ A   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 0.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘

Returns:



4674
4675
4676
# File 'lib/polars/expr.rb', line 4674

def abs
  _from_rbexpr(_rbexpr.abs)
end

#add(other) ⇒ Expr

Method equivalent of addition operator expr + other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").add(2).alias("x+int"),
  Polars.col("x").add(Polars.col("x").cum_prod).alias("x+expr")
)
# =>
# shape: (5, 3)
# ┌─────┬───────┬────────┐
# │ x   ┆ x+int ┆ x+expr │
# │ --- ┆ ---   ┆ ---    │
# │ i64 ┆ i64   ┆ i64    │
# ╞═════╪═══════╪════════╡
# │ 1   ┆ 3     ┆ 2      │
# │ 2   ┆ 4     ┆ 4      │
# │ 3   ┆ 5     ┆ 9      │
# │ 4   ┆ 6     ┆ 28     │
# │ 5   ┆ 7     ┆ 125    │
# └─────┴───────┴────────┘
df = Polars::DataFrame.new(
  {"x" => ["a", "d", "g"], "y": ["b", "e", "h"], "z": ["c", "f", "i"]}
)
df.with_columns(Polars.col("x").add(Polars.col("y")).add(Polars.col("z")).alias("xyz"))
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬─────┐
# │ x   ┆ y   ┆ z   ┆ xyz │
# │ --- ┆ --- ┆ --- ┆ --- │
# │ str ┆ str ┆ str ┆ str │
# ╞═════╪═════╪═════╪═════╡
# │ a   ┆ b   ┆ c   ┆ abc │
# │ d   ┆ e   ┆ f   ┆ def │
# │ g   ┆ h   ┆ i   ┆ ghi │
# └─────┴─────┴─────┴─────┘

Parameters:

  • other (Object)

    numeric or string value; accepts expression input.

Returns:



3337
3338
3339
# File 'lib/polars/expr.rb', line 3337

def add(other)
  self + other
end

#agg_groupsExpr

Get the group indexes of the group by operation.

Should be used in aggregation context only.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [94, 95, 96, 97, 97, 99]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").agg_groups)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[u32] │
# ╞═══════╪═══════════╡
# │ one   ┆ [0, 1, 2] │
# │ two   ┆ [3, 4, 5] │
# └───────┴───────────┘

Returns:



701
702
703
# File 'lib/polars/expr.rb', line 701

def agg_groups
  _from_rbexpr(_rbexpr.agg_groups)
end

#alias(name) ⇒ Expr

Rename the output of an expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["a", "b", nil]
  }
)
df.select(
  [
    Polars.col("a").alias("bar"),
    Polars.col("b").alias("foo")
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ bar ┆ foo  │
# │ --- ┆ ---  │
# │ i64 ┆ str  │
# ╞═════╪══════╡
# │ 1   ┆ a    │
# │ 2   ┆ b    │
# │ 3   ┆ null │
# └─────┴──────┘

Parameters:

Returns:



324
325
326
# File 'lib/polars/expr.rb', line 324

def alias(name)
  _from_rbexpr(_rbexpr._alias(name))
end

#all(drop_nulls: true) ⇒ Boolean

Check if all boolean values in a Boolean column are true.

This method is an expression - not to be confused with Polars.all which is a function to select all columns.

Examples:

df = Polars::DataFrame.new(
  {"TT" => [true, true], "TF" => [true, false], "FF" => [false, false]}
)
df.select(Polars.col("*").all)
# =>
# shape: (1, 3)
# ┌──────┬───────┬───────┐
# │ TT   ┆ TF    ┆ FF    │
# │ ---  ┆ ---   ┆ ---   │
# │ bool ┆ bool  ┆ bool  │
# ╞══════╪═══════╪═══════╡
# │ true ┆ false ┆ false │
# └──────┴───────┴───────┘

Returns:



223
224
225
# File 'lib/polars/expr.rb', line 223

def all(drop_nulls: true)
  _from_rbexpr(_rbexpr.all(drop_nulls))
end

#any(drop_nulls: true) ⇒ Boolean

Check if any boolean value in a Boolean column is true.

Examples:

df = Polars::DataFrame.new({"TF" => [true, false], "FF" => [false, false]})
df.select(Polars.all.any)
# =>
# shape: (1, 2)
# ┌──────┬───────┐
# │ TF   ┆ FF    │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ false │
# └──────┴───────┘

Returns:



198
199
200
# File 'lib/polars/expr.rb', line 198

def any(drop_nulls: true)
  _from_rbexpr(_rbexpr.any(drop_nulls))
end

#append(other, upcast: true) ⇒ Expr

Append expressions.

This is done by adding the chunks of other to this Series.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.all.head(1).append(Polars.all.tail(1)))
# =>
# shape: (2, 2)
# ┌─────┬──────┐
# │ a   ┆ b    │
# │ --- ┆ ---  │
# │ i64 ┆ i64  │
# ╞═════╪══════╡
# │ 8   ┆ null │
# │ 10  ┆ 4    │
# └─────┴──────┘

Parameters:

  • other (Expr)

    Expression to append.

  • upcast (Boolean) (defaults to: true)

    Cast both Series to the same supertype.

Returns:



813
814
815
816
# File 'lib/polars/expr.rb', line 813

def append(other, upcast: true)
  other = Utils.expr_to_lit_or_expr(other)
  _from_rbexpr(_rbexpr.append(other._rbexpr, upcast))
end

#approx_n_uniqueExpr Also known as: approx_unique

Approx count unique values.

This is done using the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").approx_n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2082
2083
2084
# File 'lib/polars/expr.rb', line 2082

def approx_n_unique
  _from_rbexpr(_rbexpr.approx_n_unique)
end

#arccosExpr

Compute the element-wise value for the inverse cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").arccos)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘

Returns:



5167
5168
5169
# File 'lib/polars/expr.rb', line 5167

def arccos
  _from_rbexpr(_rbexpr.arccos)
end

#arccoshExpr

Compute the element-wise value for the inverse hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arccosh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



5287
5288
5289
# File 'lib/polars/expr.rb', line 5287

def arccosh
  _from_rbexpr(_rbexpr.arccosh)
end

#arcsinExpr

Compute the element-wise value for the inverse sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsin)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.570796 │
# └──────────┘

Returns:



5147
5148
5149
# File 'lib/polars/expr.rb', line 5147

def arcsin
  _from_rbexpr(_rbexpr.arcsin)
end

#arcsinhExpr

Compute the element-wise value for the inverse hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arcsinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.881374 │
# └──────────┘

Returns:



5267
5268
5269
# File 'lib/polars/expr.rb', line 5267

def arcsinh
  _from_rbexpr(_rbexpr.arcsinh)
end

#arctanExpr

Compute the element-wise value for the inverse tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.785398 │
# └──────────┘

Returns:



5187
5188
5189
# File 'lib/polars/expr.rb', line 5187

def arctan
  _from_rbexpr(_rbexpr.arctan)
end

#arctanhExpr

Compute the element-wise value for the inverse hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").arctanh)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ inf │
# └─────┘

Returns:



5307
5308
5309
# File 'lib/polars/expr.rb', line 5307

def arctanh
  _from_rbexpr(_rbexpr.arctanh)
end

#arg_maxExpr

Get the index of the maximal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



1443
1444
1445
# File 'lib/polars/expr.rb', line 1443

def arg_max
  _from_rbexpr(_rbexpr.arg_max)
end

#arg_minExpr

Get the index of the minimal value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# └─────┘

Returns:



1467
1468
1469
# File 'lib/polars/expr.rb', line 1467

def arg_min
  _from_rbexpr(_rbexpr.arg_min)
end

#arg_sort(reverse: false, nulls_last: false) ⇒ Expr

Get the index values that would sort this column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").arg_sort)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# │ 2   │
# └─────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Sort in reverse (descending) order.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last instead of first.

Returns:



1419
1420
1421
# File 'lib/polars/expr.rb', line 1419

def arg_sort(reverse: false, nulls_last: false)
  _from_rbexpr(_rbexpr.arg_sort(reverse, nulls_last))
end

#arg_uniqueExpr

Get index of first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10],
    "b" => [nil, 4, 4]
  }
)
df.select(Polars.col("a").arg_unique)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# │ 2   │
# └─────┘
df.select(Polars.col("b").arg_unique)
# =>
# shape: (2, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘

Returns:



2148
2149
2150
# File 'lib/polars/expr.rb', line 2148

def arg_unique
  _from_rbexpr(_rbexpr.arg_unique)
end

#argsort(reverse: false, nulls_last: false) ⇒ expr

Get the index values that would sort this column.

Alias for #arg_sort.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").argsort)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# │ 2   │
# └─────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Sort in reverse (descending) order.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last instead of first.

Returns:

  • (expr)


4707
4708
4709
# File 'lib/polars/expr.rb', line 4707

def argsort(reverse: false, nulls_last: false)
  arg_sort(reverse: reverse, nulls_last: nulls_last)
end

#arrArrayExpr

Create an object namespace of all array related methods.

Returns:



5989
5990
5991
# File 'lib/polars/expr.rb', line 5989

def arr
  ArrayExpr.new(self)
end

#backward_fill(limit: nil) ⇒ Expr

Fill missing values with the next to be seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.backward_fill)
# =>
# shape: (3, 2)
# ┌──────┬─────┐
# │ a    ┆ b   │
# │ ---  ┆ --- │
# │ i64  ┆ i64 │
# ╞══════╪═════╡
# │ 1    ┆ 4   │
# │ 2    ┆ 6   │
# │ null ┆ 6   │
# └──────┴─────┘

Parameters:

  • limit (Integer) (defaults to: nil)

    The number of consecutive null values to backward fill.

Returns:



1823
1824
1825
# File 'lib/polars/expr.rb', line 1823

def backward_fill(limit: nil)
  _from_rbexpr(_rbexpr.backward_fill(limit))
end

#binBinaryExpr

Create an object namespace of all binary related methods.

Returns:



5996
5997
5998
# File 'lib/polars/expr.rb', line 5996

def bin
  BinaryExpr.new(self)
end

#bottom_k(k: 5) ⇒ Expr

Return the k smallest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘

Parameters:

  • k (Integer) (defaults to: 5)

    Number of elements to return.

Returns:



1387
1388
1389
1390
# File 'lib/polars/expr.rb', line 1387

def bottom_k(k: 5)
  k = Utils.parse_as_expression(k)
  _from_rbexpr(_rbexpr.bottom_k(k))
end

#cast(dtype, strict: true) ⇒ Expr

Cast between data types.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["4", "5", "6"]
  }
)
df.with_columns(
  [
    Polars.col("a").cast(:f64),
    Polars.col("b").cast(:i32)
  ]
)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i32 │
# ╞═════╪═════╡
# │ 1.0 ┆ 4   │
# │ 2.0 ┆ 5   │
# │ 3.0 ┆ 6   │
# └─────┴─────┘

Parameters:

  • dtype (Symbol)

    DataType to cast to.

  • strict (Boolean) (defaults to: true)

    Throw an error if a cast could not be done. For instance, due to an overflow.

Returns:



1234
1235
1236
1237
# File 'lib/polars/expr.rb', line 1234

def cast(dtype, strict: true)
  dtype = Utils.rb_type_to_dtype(dtype)
  _from_rbexpr(_rbexpr.cast(dtype, strict))
end

#catCatExpr

Create an object namespace of all categorical related methods.

Returns:



6003
6004
6005
# File 'lib/polars/expr.rb', line 6003

def cat
  CatExpr.new(self)
end

#ceilExpr

Rounds up to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").ceil)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# │ 1.0 │
# │ 1.0 │
# │ 2.0 │
# └─────┘

Returns:



1113
1114
1115
# File 'lib/polars/expr.rb', line 1113

def ceil
  _from_rbexpr(_rbexpr.ceil)
end

#clip(lower_bound, upper_bound) ⇒ Expr

Set values outside the given boundaries to the boundary value.

Only works for numeric and temporal columns. If you want to clip other data types, consider writing a when-then-otherwise expression.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ 1           │
# │ 5    ┆ 5           │
# │ null ┆ null        │
# │ 50   ┆ 10          │
# └──────┴─────────────┘

Parameters:

  • lower_bound (Numeric)

    Minimum value.

  • upper_bound (Numeric)

    Maximum value.

Returns:



4929
4930
4931
4932
4933
4934
4935
4936
4937
# File 'lib/polars/expr.rb', line 4929

def clip(lower_bound, upper_bound)
  if !lower_bound.nil?
    lower_bound = Utils.parse_as_expression(lower_bound, str_as_lit: true)
  end
  if !upper_bound.nil?
    upper_bound = Utils.parse_as_expression(upper_bound, str_as_lit: true)
  end
  _from_rbexpr(_rbexpr.clip(lower_bound, upper_bound))
end

#clip_max(upper_bound) ⇒ Expr

Clip (limit) the values in an array to a max boundary.

Only works for numerical types.

If you want to clip other dtypes, consider writing a "when, then, otherwise" expression. See when for more information.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ -50         │
# │ 5    ┆ 0           │
# │ null ┆ null        │
# │ 50   ┆ 0           │
# └──────┴─────────────┘

Parameters:

  • upper_bound (Numeric)

    Maximum value.

Returns:



4997
4998
4999
# File 'lib/polars/expr.rb', line 4997

def clip_max(upper_bound)
  clip(nil, upper_bound)
end

#clip_min(lower_bound) ⇒ Expr

Clip (limit) the values in an array to a min boundary.

Only works for numerical types.

If you want to clip other dtypes, consider writing a "when, then, otherwise" expression. See when for more information.

Examples:

df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
# =>
# shape: (4, 2)
# ┌──────┬─────────────┐
# │ foo  ┆ foo_clipped │
# │ ---  ┆ ---         │
# │ i64  ┆ i64         │
# ╞══════╪═════════════╡
# │ -50  ┆ 0           │
# │ 5    ┆ 5           │
# │ null ┆ null        │
# │ 50   ┆ 50          │
# └──────┴─────────────┘

Parameters:

  • lower_bound (Numeric)

    Minimum value.

Returns:



4966
4967
4968
# File 'lib/polars/expr.rb', line 4966

def clip_min(lower_bound)
  clip(lower_bound, nil)
end

#cosExpr

Compute the element-wise value for the cosine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").cos)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Returns:



5107
5108
5109
# File 'lib/polars/expr.rb', line 5107

def cos
  _from_rbexpr(_rbexpr.cos)
end

#coshExpr

Compute the element-wise value for the hyperbolic cosine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").cosh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.543081 │
# └──────────┘

Returns:



5227
5228
5229
# File 'lib/polars/expr.rb', line 5227

def cosh
  _from_rbexpr(_rbexpr.cosh)
end

#countExpr

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Returns:



721
722
723
# File 'lib/polars/expr.rb', line 721

def count
  _from_rbexpr(_rbexpr.count)
end

#cum_count(reverse: false) ⇒ Expr Also known as: cumcount

Get an array with the cumulative count computed at every element.

Counting from 0 to len

Examples:

df = Polars::DataFrame.new({"a" => ["x", "k", nil, "d"]})
df.with_columns(
  [
    Polars.col("a").cum_count.alias("cum_count"),
    Polars.col("a").cum_count(reverse: true).alias("cum_count_reverse")
  ]
)
# =>
# shape: (4, 3)
# ┌──────┬───────────┬───────────────────┐
# │ a    ┆ cum_count ┆ cum_count_reverse │
# │ ---  ┆ ---       ┆ ---               │
# │ str  ┆ u32       ┆ u32               │
# ╞══════╪═══════════╪═══════════════════╡
# │ x    ┆ 1         ┆ 3                 │
# │ k    ┆ 2         ┆ 2                 │
# │ null ┆ 2         ┆ 1                 │
# │ d    ┆ 3         ┆ 1                 │
# └──────┴───────────┴───────────────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1062
1063
1064
# File 'lib/polars/expr.rb', line 1062

def cum_count(reverse: false)
  _from_rbexpr(_rbexpr.cum_count(reverse))
end

#cum_max(reverse: false) ⇒ Expr Also known as: cummax

Get an array with the cumulative max computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_max,
    Polars.col("a").cum_max(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 4         │
# │ 2   ┆ 4         │
# │ 3   ┆ 4         │
# │ 4   ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



1028
1029
1030
# File 'lib/polars/expr.rb', line 1028

def cum_max(reverse: false)
  _from_rbexpr(_rbexpr.cum_max(reverse))
end

#cum_min(reverse: false) ⇒ Expr Also known as: cummin

Get an array with the cumulative min computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_min,
    Polars.col("a").cum_min(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 1         │
# │ 1   ┆ 2         │
# │ 1   ┆ 3         │
# │ 1   ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



996
997
998
# File 'lib/polars/expr.rb', line 996

def cum_min(reverse: false)
  _from_rbexpr(_rbexpr.cum_min(reverse))
end

#cum_prod(reverse: false) ⇒ Expr Also known as: cumprod

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get an array with the cumulative product computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_prod,
    Polars.col("a").cum_prod(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 24        │
# │ 2   ┆ 24        │
# │ 6   ┆ 12        │
# │ 24  ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



964
965
966
# File 'lib/polars/expr.rb', line 964

def cum_prod(reverse: false)
  _from_rbexpr(_rbexpr.cum_prod(reverse))
end

#cum_sum(reverse: false) ⇒ Expr Also known as: cumsum

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get an array with the cumulative sum computed at every element.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
df.select(
  [
    Polars.col("a").cum_sum,
    Polars.col("a").cum_sum(reverse: true).alias("a_reverse")
  ]
)
# =>
# shape: (4, 2)
# ┌─────┬───────────┐
# │ a   ┆ a_reverse │
# │ --- ┆ ---       │
# │ i64 ┆ i64       │
# ╞═════╪═══════════╡
# │ 1   ┆ 10        │
# │ 3   ┆ 9         │
# │ 6   ┆ 7         │
# │ 10  ┆ 4         │
# └─────┴───────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



928
929
930
# File 'lib/polars/expr.rb', line 928

def cum_sum(reverse: false)
  _from_rbexpr(_rbexpr.cum_sum(reverse))
end

#cumulative_eval(expr, min_periods: 1, parallel: false) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

This can be really slow as it can have O(n^2) complexity. Don't use this for operations that visit all elements.

Run an expression over a sliding window that increases 1 slot every iteration.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.col("values").cumulative_eval(
      Polars.element.first - Polars.element.last ** 2
    )
  ]
)
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ 0.0    │
# │ -3.0   │
# │ -8.0   │
# │ -15.0  │
# │ -24.0  │
# └────────┘

Parameters:

  • expr (Expr)

    Expression to evaluate

  • min_periods (Integer) (defaults to: 1)

    Number of valid values there should be in the window before the expression is evaluated. valid values = length - null_count

  • parallel (Boolean) (defaults to: false)

    Run in parallel. Don't do this in a group by or another operation that already has much parallelization.

Returns:



5722
5723
5724
5725
5726
# File 'lib/polars/expr.rb', line 5722

def cumulative_eval(expr, min_periods: 1, parallel: false)
  _from_rbexpr(
    _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
  )
end

#cut(breaks, labels: nil, left_closed: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories.

Examples:

Divide a column into three categories.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").cut([-1, 1], labels: ["a", "b", "c"]).alias("cut")
)
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ foo ┆ cut │
# │ --- ┆ --- │
# │ i64 ┆ cat │
# ╞═════╪═════╡
# │ -2  ┆ a   │
# │ -1  ┆ a   │
# │ 0   ┆ b   │
# │ 1   ┆ b   │
# │ 2   ┆ c   │
# └─────┴─────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").cut([-1, 1], include_breaks: true).alias("cut")
).unnest("cut")
# =>
# shape: (5, 3)
# ┌─────┬──────┬────────────┐
# │ foo ┆ brk  ┆ foo_bin    │
# │ --- ┆ ---  ┆ ---        │
# │ i64 ┆ f64  ┆ cat        │
# ╞═════╪══════╪════════════╡
# │ -2  ┆ -1.0 ┆ (-inf, -1] │
# │ -1  ┆ -1.0 ┆ (-inf, -1] │
# │ 0   ┆ 1.0  ┆ (-1, 1]    │
# │ 1   ┆ 1.0  ┆ (-1, 1]    │
# │ 2   ┆ inf  ┆ (1, inf]   │
# └─────┴──────┴────────────┘

Parameters:

  • breaks (Array)

    List of unique cut points.

  • labels (Array) (defaults to: nil)

    Names of the categories. The number of labels must be equal to the number of cut points plus one.

  • left_closed (Boolean) (defaults to: false)

    Set the intervals to be left-closed instead of right-closed.

  • include_breaks (Boolean) (defaults to: false)

    Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.

Returns:



2531
2532
2533
# File 'lib/polars/expr.rb', line 2531

def cut(breaks, labels: nil, left_closed: false, include_breaks: false)
  _from_rbexpr(_rbexpr.cut(breaks, labels, left_closed, include_breaks))
end

#diff(n: 1, null_behavior: "ignore") ⇒ Expr

Calculate the n-th discrete difference.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [20, 10, 30]
  }
)
df.select(Polars.col("a").diff)
# =>
# shape: (3, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ -10  │
# │ 20   │
# └──────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Number of slots to shift.

  • null_behavior ("ignore", "drop") (defaults to: "ignore")

    How to handle null values.

Returns:



4800
4801
4802
# File 'lib/polars/expr.rb', line 4800

def diff(n: 1, null_behavior: "ignore")
  _from_rbexpr(_rbexpr.diff(n, null_behavior))
end

#dot(other) ⇒ Expr

Compute the dot/inner product between two Expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 3, 5],
    "b" => [2, 4, 6]
  }
)
df.select(Polars.col("a").dot(Polars.col("b")))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 44  │
# └─────┘

Parameters:

  • other (Expr)

    Expression to compute dot product with.

Returns:



1167
1168
1169
1170
# File 'lib/polars/expr.rb', line 1167

def dot(other)
  other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
  _from_rbexpr(_rbexpr.dot(other._rbexpr))
end

#drop_nansExpr

Drop floating point NaN values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nans)
# =>
# shape: (3, 1)
# ┌──────┐
# │ b    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.0  │
# │ 4.0  │
# └──────┘

Returns:



893
894
895
# File 'lib/polars/expr.rb', line 893

def drop_nans
  _from_rbexpr(_rbexpr.drop_nans)
end

#drop_nullsExpr

Drop null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4.0, 4.0, Float::NAN]
  }
)
df.select(Polars.col("b").drop_nulls)
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# │ 4.0 │
# │ NaN │
# └─────┘

Returns:



866
867
868
# File 'lib/polars/expr.rb', line 866

def drop_nulls
  _from_rbexpr(_rbexpr.drop_nulls)
end

#dtDateTimeExpr

Create an object namespace of all datetime related methods.

Returns:



6010
6011
6012
# File 'lib/polars/expr.rb', line 6010

def dt
  DateTimeExpr.new(self)
end

#entropy(base: 2, normalize: true) ⇒ Expr

Computes the entropy.

Uses the formula -sum(pk * log(pk) where pk are discrete probabilities.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").entropy(base: 2))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.459148 │
# └──────────┘
df.select(Polars.col("a").entropy(base: 2, normalize: false))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -6.754888 │
# └───────────┘

Parameters:

  • base (Float) (defaults to: 2)

    Given base, defaults to e.

  • normalize (Boolean) (defaults to: true)

    Normalize pk if it doesn't sum to 1.

Returns:



5675
5676
5677
# File 'lib/polars/expr.rb', line 5675

def entropy(base: 2, normalize: true)
  _from_rbexpr(_rbexpr.entropy(base, normalize))
end

#eq(other) ⇒ Expr

Method equivalent of equality operator expr == other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x == y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x == y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ false  │
# │ 2.0 ┆ 2.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 4.0 ┆ 4.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3047
3048
3049
# File 'lib/polars/expr.rb', line 3047

def eq(other)
  self == other
end

#eq_missing(other) ⇒ Expr

Method equivalent of equality operator expr == other where None == None.

This differs from default eq where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  data={
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").eq(Polars.col("y")).alias("x eq y"),
  Polars.col("x").eq_missing(Polars.col("y")).alias("x eq_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x eq y ┆ x eq_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ false  ┆ false          │
# │ 2.0  ┆ 2.0  ┆ true   ┆ true           │
# │ NaN  ┆ NaN  ┆ true   ┆ true           │
# │ 4.0  ┆ 4.0  ┆ true   ┆ true           │
# │ null ┆ 5.0  ┆ null   ┆ false          │
# │ null ┆ null ┆ null   ┆ true           │
# └──────┴──────┴────────┴────────────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3085
3086
3087
3088
# File 'lib/polars/expr.rb', line 3085

def eq_missing(other)
  other = Utils.parse_as_expression(other, str_as_lit: true)
  _from_rbexpr(_rbexpr.eq_missing(other))
end

#ewm_mean(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving average.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_mean(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.666667 │
# │ 2.428571 │
# └──────────┘

Returns:



5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449
5450
# File 'lib/polars/expr.rb', line 5439

def ewm_mean(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_mean(alpha, adjust, min_periods, ignore_nulls))
end

#ewm_std(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_std(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.707107 │
# │ 0.963624 │
# └──────────┘

Returns:



5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
# File 'lib/polars/expr.rb', line 5470

def ewm_std(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods, ignore_nulls))
end

#ewm_var(com: nil, span: nil, half_life: nil, alpha: nil, adjust: true, bias: false, min_periods: 1, ignore_nulls: true) ⇒ Expr

Exponentially-weighted moving variance.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").ewm_var(com: 1))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 0.5      │
# │ 0.928571 │
# └──────────┘

Returns:



5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
# File 'lib/polars/expr.rb', line 5502

def ewm_var(
  com: nil,
  span: nil,
  half_life: nil,
  alpha: nil,
  adjust: true,
  bias: false,
  min_periods: 1,
  ignore_nulls: true
)
  alpha = _prepare_alpha(com, span, half_life, alpha)
  _from_rbexpr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods, ignore_nulls))
end

#exclude(columns) ⇒ Expr

Exclude certain columns from a wildcard/regex selection.

You may also use regexes in the exclude list. They must start with ^ and end with $.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
df.select(Polars.all.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Parameters:

  • columns (Object)

    Column(s) to exclude from selection. This can be:

    • a column name, or multiple column names
    • a regular expression starting with ^ and ending with $
    • a dtype or multiple dtypes

Returns:



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/polars/expr.rb', line 365

def exclude(columns)
  if columns.is_a?(::String)
    columns = [columns]
    return _from_rbexpr(_rbexpr.exclude(columns))
  elsif !columns.is_a?(::Array)
    columns = [columns]
    return _from_rbexpr(_rbexpr.exclude_dtype(columns))
  end

  if !columns.all? { |a| a.is_a?(::String) } || !columns.all? { |a| Utils.is_polars_dtype(a) }
    raise ArgumentError, "input should be all string or all DataType"
  end

  if columns[0].is_a?(::String)
    _from_rbexpr(_rbexpr.exclude(columns))
  else
    _from_rbexpr(_rbexpr.exclude_dtype(columns))
  end
end

#expExpr

Compute the exponential, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").exp)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 2.718282 │
# │ 7.389056 │
# │ 54.59815 │
# └──────────┘

Returns:



289
290
291
# File 'lib/polars/expr.rb', line 289

def exp
  _from_rbexpr(_rbexpr.exp)
end

#explodeExpr

Explode a list or utf8 Series.

This means that every item is expanded to a new row.

Examples:

df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
df.select(Polars.col("b").explode)
# =>
# shape: (6, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# │ 4   │
# │ 5   │
# │ 6   │
# └─────┘

Returns:



2930
2931
2932
# File 'lib/polars/expr.rb', line 2930

def explode
  _from_rbexpr(_rbexpr.explode)
end

#extend_constant(value, n) ⇒ Expr

Extend the Series with given number of values.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").extend_constant(99, 2))
# =>
# shape: (5, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 2      │
# │ 3      │
# │ 99     │
# │ 99     │
# └────────┘

Parameters:

  • value (Object)

    The value to extend the Series with. This value may be nil to fill with nulls.

  • n (Integer)

    The number of values to extend.

Returns:



5542
5543
5544
# File 'lib/polars/expr.rb', line 5542

def extend_constant(value, n)
  _from_rbexpr(_rbexpr.extend_constant(value, n))
end

#fill_nan(fill_value) ⇒ Expr

Fill floating point NaN value with a fill value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1.0, nil, Float::NAN],
    "b" => [4.0, Float::NAN, 6]
  }
)
df.fill_nan("zero")
# =>
# shape: (3, 2)
# ┌──────┬──────┐
# │ a    ┆ b    │
# │ ---  ┆ ---  │
# │ str  ┆ str  │
# ╞══════╪══════╡
# │ 1.0  ┆ 4.0  │
# │ null ┆ zero │
# │ zero ┆ 6.0  │
# └──────┴──────┘

Returns:



1762
1763
1764
1765
# File 'lib/polars/expr.rb', line 1762

def fill_nan(fill_value)
  fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
  _from_rbexpr(_rbexpr.fill_nan(fill_value._rbexpr))
end

#fill_null(value = nil, strategy: nil, limit: nil) ⇒ Expr

Fill null values using the specified value or strategy.

To interpolate over null values see interpolate.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.fill_null(strategy: "zero")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 0   │
# │ 0   ┆ 6   │
# └─────┴─────┘
df.fill_null(99)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 99  │
# │ 99  ┆ 6   │
# └─────┴─────┘
df.fill_null(strategy: "forward")
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘

Parameters:

  • value (Object) (defaults to: nil)

    Value used to fill null values.

  • strategy (nil, "forward", "backward", "min", "max", "mean", "zero", "one") (defaults to: nil)

    Strategy used to fill null values.

  • limit (Integer) (defaults to: nil)

    Number of consecutive null values to fill when using the 'forward' or 'backward' strategy.

Returns:



1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
# File 'lib/polars/expr.rb', line 1722

def fill_null(value = nil, strategy: nil, limit: nil)
  if !value.nil? && !strategy.nil?
    raise ArgumentError, "cannot specify both 'value' and 'strategy'."
  elsif value.nil? && strategy.nil?
    raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
  elsif ["forward", "backward"].include?(strategy) && !limit.nil?
    raise ArgumentError, "can only specify 'limit' when strategy is set to 'backward' or 'forward'"
  end

  if !value.nil?
    value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
    _from_rbexpr(_rbexpr.fill_null(value._rbexpr))
  else
    _from_rbexpr(_rbexpr.fill_null_with_strategy(strategy, limit))
  end
end

#filter(predicate) ⇒ Expr

Filter a single column.

Mostly useful in an aggregation context. If you want to filter on a DataFrame level, use LazyFrame#filter.

Examples:

df = Polars::DataFrame.new(
  {
    "group_col" => ["g1", "g1", "g2"],
    "b" => [1, 2, 3]
  }
)
(
  df.group_by("group_col").agg(
    [
      Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
      Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
    ]
  )
).sort("group_col")
# =>
# shape: (2, 3)
# ┌───────────┬─────┬─────┐
# │ group_col ┆ lt  ┆ gte │
# │ ---       ┆ --- ┆ --- │
# │ str       ┆ i64 ┆ i64 │
# ╞═══════════╪═════╪═════╡
# │ g1        ┆ 1   ┆ 2   │
# │ g2        ┆ 0   ┆ 3   │
# └───────────┴─────┴─────┘

Parameters:

  • predicate (Expr)

    Boolean expression.

Returns:



2714
2715
2716
# File 'lib/polars/expr.rb', line 2714

def filter(predicate)
  _from_rbexpr(_rbexpr.filter(predicate._rbexpr))
end

#firstExpr

Get the first value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").first)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Returns:



2196
2197
2198
# File 'lib/polars/expr.rb', line 2196

def first
  _from_rbexpr(_rbexpr.first)
end

#flattenExpr

Explode a list or utf8 Series. This means that every item is expanded to a new row.

Alias for #explode.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => ["a", "b", "b"],
    "values" => [[1, 2], [2, 3], [4]]
  }
)
df.group_by("group").agg(Polars.col("values").flatten)
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ values    │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ a     ┆ [1, 2]    │
# │ b     ┆ [2, 3, 4] │
# └───────┴───────────┘

Returns:



2903
2904
2905
# File 'lib/polars/expr.rb', line 2903

def flatten
  _from_rbexpr(_rbexpr.explode)
end

#floorExpr

Rounds down to the nearest integer value.

Only works on floating point Series.

Examples:

df = Polars::DataFrame.new({"a" => [0.3, 0.5, 1.0, 1.1]})
df.select(Polars.col("a").floor)
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# │ 0.0 │
# │ 1.0 │
# │ 1.0 │
# └─────┘

Returns:



1088
1089
1090
# File 'lib/polars/expr.rb', line 1088

def floor
  _from_rbexpr(_rbexpr.floor)
end

#floordiv(other) ⇒ Expr

Method equivalent of integer division operator expr // other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 3, 4, 5]})
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").floordiv(2).alias("x//2")
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬──────┐
# │ x   ┆ x/2 ┆ x//2 │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ f64 ┆ i64  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 0.5 ┆ 0    │
# │ 2   ┆ 1.0 ┆ 1    │
# │ 3   ┆ 1.5 ┆ 1    │
# │ 4   ┆ 2.0 ┆ 2    │
# │ 5   ┆ 2.5 ┆ 2    │
# └─────┴─────┴──────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3367
3368
3369
# File 'lib/polars/expr.rb', line 3367

def floordiv(other)
  _from_rbexpr(_rbexpr.floordiv(_to_rbexpr(other)))
end

#forward_fill(limit: nil) ⇒ Expr

Fill missing values with the latest seen values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [4, nil, 6]
  }
)
df.select(Polars.all.forward_fill)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 4   │
# │ 2   ┆ 4   │
# │ 2   ┆ 6   │
# └─────┴─────┘

Parameters:

  • limit (Integer) (defaults to: nil)

    The number of consecutive null values to forward fill.

Returns:



1793
1794
1795
# File 'lib/polars/expr.rb', line 1793

def forward_fill(limit: nil)
  _from_rbexpr(_rbexpr.forward_fill(limit))
end

#gather(indices) ⇒ Expr Also known as: take

Take values by index.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.group_by("group", maintain_order: true).agg(Polars.col("value").take([2, 1]))
# =>
# shape: (2, 2)
# ┌───────┬───────────┐
# │ group ┆ value     │
# │ ---   ┆ ---       │
# │ str   ┆ list[i64] │
# ╞═══════╪═══════════╡
# │ one   ┆ [2, 98]   │
# │ two   ┆ [4, 99]   │
# └───────┴───────────┘

Parameters:

  • indices (Expr)

    An expression that leads to a :u32 dtyped Series.

Returns:



1591
1592
1593
1594
1595
1596
1597
1598
# File 'lib/polars/expr.rb', line 1591

def gather(indices)
  if indices.is_a?(::Array)
    indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
  else
    indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
  end
  _from_rbexpr(_rbexpr.gather(indices_lit._rbexpr))
end

#gather_every(n, offset = 0) ⇒ Expr Also known as: take_every

Take every nth value in the Series and return as a new Series.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df.select(Polars.col("foo").gather_every(3))
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 7   │
# └─────┘

Returns:



2952
2953
2954
# File 'lib/polars/expr.rb', line 2952

def gather_every(n, offset = 0)
  _from_rbexpr(_rbexpr.gather_every(n, offset))
end

#ge(other) ⇒ Expr

Method equivalent of "greater than or equal" operator expr >= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
  Polars.col("x").ge(Polars.col("y")).alias("x >= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x >= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.0 ┆ true   │
# │ NaN ┆ NaN ┆ true   │
# │ 2.0 ┆ 1.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3119
3120
3121
# File 'lib/polars/expr.rb', line 3119

def ge(other)
  self >= other
end

#gt(other) ⇒ Expr

Method equivalent of "greater than" operator expr > other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 2.0],
    "y" => [5.0, 3.0, Float::NAN, 1.0]
  }
)
df.with_columns(
    Polars.col("x").gt(Polars.col("y")).alias("x > y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x > y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 5.0 ┆ 5.0 ┆ false │
# │ 4.0 ┆ 3.0 ┆ true  │
# │ NaN ┆ NaN ┆ false │
# │ 2.0 ┆ 1.0 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3152
3153
3154
# File 'lib/polars/expr.rb', line 3152

def gt(other)
  self > other
end

#head(n = 10) ⇒ Expr

Get the first n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.head(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



2978
2979
2980
# File 'lib/polars/expr.rb', line 2978

def head(n = 10)
  _from_rbexpr(_rbexpr.head(n))
end

#implodeExpr

Aggregate to list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.all.implode)
# =>
# shape: (1, 2)
# ┌───────────┬───────────┐
# │ a         ┆ b         │
# │ ---       ┆ ---       │
# │ list[i64] ┆ list[i64] │
# ╞═══════════╪═══════════╡
# │ [1, 2, 3] ┆ [4, 5, 6] │
# └───────────┴───────────┘

Returns:



5778
5779
5780
# File 'lib/polars/expr.rb', line 5778

def implode
  _from_rbexpr(_rbexpr.implode)
end

#interpolate(method: "linear") ⇒ Expr

Fill nulls with linear interpolation over missing values.

Can also be used to regrid data to a new grid - see examples below.

Examples:

Fill nulls with linear interpolation

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, 3],
    "b" => [1.0, Float::NAN, 3.0]
  }
)
df.select(Polars.all.interpolate)
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 1.0 │
# │ 2.0 ┆ NaN │
# │ 3.0 ┆ 3.0 │
# └─────┴─────┘

Returns:



3855
3856
3857
# File 'lib/polars/expr.rb', line 3855

def interpolate(method: "linear")
  _from_rbexpr(_rbexpr.interpolate(method))
end

#is_between(start, _end, closed: "both") ⇒ Expr

Check if this expression is between start and end.

Examples:

df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
df.with_columns(Polars.col("num").is_between(2, 4).alias("is_between"))
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ true       │
# │ 5   ┆ false      │
# └─────┴────────────┘

Use the closed argument to include or exclude the values at the bounds:

df.with_columns(
  Polars.col("num").is_between(2, 4, closed: "left").alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ num ┆ is_between │
# │ --- ┆ ---        │
# │ i64 ┆ bool       │
# ╞═════╪════════════╡
# │ 1   ┆ false      │
# │ 2   ┆ true       │
# │ 3   ┆ true       │
# │ 4   ┆ false      │
# │ 5   ┆ false      │
# └─────┴────────────┘

You can also use strings as well as numeric/temporal values:

df = Polars::DataFrame.new({"a" => ["a", "b", "c", "d", "e"]})
df.with_columns(
  Polars.col("a")
    .is_between(Polars.lit("a"), Polars.lit("c"), closed: "both")
    .alias("is_between")
)
# =>
# shape: (5, 2)
# ┌─────┬────────────┐
# │ a   ┆ is_between │
# │ --- ┆ ---        │
# │ str ┆ bool       │
# ╞═════╪════════════╡
# │ a   ┆ true       │
# │ b   ┆ true       │
# │ c   ┆ true       │
# │ d   ┆ false      │
# │ e   ┆ false      │
# └─────┴────────────┘

Parameters:

  • start (Object)

    Lower bound as primitive type or datetime.

  • _end (Object)

    Upper bound as primitive type or datetime.

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define which sides of the interval are closed (inclusive).

Returns:



3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
# File 'lib/polars/expr.rb', line 3702

def is_between(start, _end, closed: "both")
  start = Utils.expr_to_lit_or_expr(start, str_to_lit: false)
  _end = Utils.expr_to_lit_or_expr(_end, str_to_lit: false)

  case closed
  when "none"
    (self > start) & (self < _end)
  when "both"
    (self >= start) & (self <= _end)
  when "right"
    (self > start) & (self <= _end)
  when "left"
    (self >= start) & (self < _end)
  else
    raise ArgumentError, "closed must be one of 'left', 'right', 'both', or 'none'"
  end
end

#is_duplicatedExpr

Get mask of duplicated values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_duplicated)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ true  │
# │ false │
# └───────┘

Returns:



2353
2354
2355
# File 'lib/polars/expr.rb', line 2353

def is_duplicated
  _from_rbexpr(_rbexpr.is_duplicated)
end

#is_finiteExpr

Returns a boolean Series indicating which values are finite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_finite)
# =>
# shape: (2, 2)
# ┌──────┬───────┐
# │ A    ┆ B     │
# │ ---  ┆ ---   │
# │ bool ┆ bool  │
# ╞══════╪═══════╡
# │ true ┆ true  │
# │ true ┆ false │
# └──────┴───────┘

Returns:



574
575
576
# File 'lib/polars/expr.rb', line 574

def is_finite
  _from_rbexpr(_rbexpr.is_finite)
end

#is_first_distinctExpr Also known as: is_first

Get a mask of the first unique value.

Examples:

df = Polars::DataFrame.new(
  {
    "num" => [1, 2, 3, 1, 5]
  }
)
df.with_column(Polars.col("num").is_first.alias("is_first"))
# =>
# shape: (5, 2)
# ┌─────┬──────────┐
# │ num ┆ is_first │
# │ --- ┆ ---      │
# │ i64 ┆ bool     │
# ╞═════╪══════════╡
# │ 1   ┆ true     │
# │ 2   ┆ true     │
# │ 3   ┆ true     │
# │ 1   ┆ false    │
# │ 5   ┆ true     │
# └─────┴──────────┘

Returns:



2330
2331
2332
# File 'lib/polars/expr.rb', line 2330

def is_first_distinct
  _from_rbexpr(_rbexpr.is_first_distinct)
end

#is_in(other) ⇒ Expr Also known as: in?

Check if elements of this expression are present in the other Series.

Examples:

df = Polars::DataFrame.new(
  {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
)
df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
# =>
# shape: (3, 1)
# ┌──────────┐
# │ contains │
# │ ---      │
# │ bool     │
# ╞══════════╡
# │ true     │
# │ true     │
# │ false    │
# └──────────┘

Parameters:

  • other (Object)

    Series or sequence of primitive type.

Returns:



3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
# File 'lib/polars/expr.rb', line 3587

def is_in(other)
  if other.is_a?(::Array)
    if other.length == 0
      other = Polars.lit(nil)
    else
      other = Polars.lit(Series.new(other))
    end
  else
    other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
  end
  _from_rbexpr(_rbexpr.is_in(other._rbexpr))
end

#is_infiniteExpr

Returns a boolean Series indicating which values are infinite.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1.0, 2],
    "B" => [3.0, Float::INFINITY]
  }
)
df.select(Polars.all.is_infinite)
# =>
# shape: (2, 2)
# ┌───────┬───────┐
# │ A     ┆ B     │
# │ ---   ┆ ---   │
# │ bool  ┆ bool  │
# ╞═══════╪═══════╡
# │ false ┆ false │
# │ false ┆ true  │
# └───────┴───────┘

Returns:



600
601
602
# File 'lib/polars/expr.rb', line 600

def is_infinite
  _from_rbexpr(_rbexpr.is_infinite)
end

#is_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬─────────┐
# │ a    ┆ b   ┆ b_isnan │
# │ ---  ┆ --- ┆ ---     │
# │ i64  ┆ f64 ┆ bool    │
# ╞══════╪═════╪═════════╡
# │ 1    ┆ 1.0 ┆ false   │
# │ 2    ┆ 2.0 ┆ false   │
# │ null ┆ NaN ┆ true    │
# │ 1    ┆ 1.0 ┆ false   │
# │ 5    ┆ 5.0 ┆ false   │
# └──────┴─────┴─────────┘

Returns:



633
634
635
# File 'lib/polars/expr.rb', line 633

def is_nan
  _from_rbexpr(_rbexpr.is_nan)
end

#is_notExpr Also known as: not_

Negate a boolean expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, false],
    "b" => ["a", "b", nil]
  }
)
# =>
# shape: (3, 2)
# ┌───────┬──────┐
# │ a     ┆ b    │
# │ ---   ┆ ---  │
# │ bool  ┆ str  │
# ╞═══════╪══════╡
# │ true  ┆ a    │
# │ false ┆ b    │
# │ false ┆ null │
# └───────┴──────┘
df.select(Polars.col("a").is_not)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ true  │
# └───────┘

Returns:



489
490
491
# File 'lib/polars/expr.rb', line 489

def is_not
  _from_rbexpr(_rbexpr.not_)
end

#is_not_nanExpr

Note:

Floating point NaN (Not A Number) should not be confused with missing data represented as nil.

Returns a boolean Series indicating which values are not NaN.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
# =>
# shape: (5, 3)
# ┌──────┬─────┬──────────────┐
# │ a    ┆ b   ┆ b_is_not_nan │
# │ ---  ┆ --- ┆ ---          │
# │ i64  ┆ f64 ┆ bool         │
# ╞══════╪═════╪══════════════╡
# │ 1    ┆ 1.0 ┆ true         │
# │ 2    ┆ 2.0 ┆ true         │
# │ null ┆ NaN ┆ false        │
# │ 1    ┆ 1.0 ┆ true         │
# │ 5    ┆ 5.0 ┆ true         │
# └──────┴─────┴──────────────┘

Returns:



666
667
668
# File 'lib/polars/expr.rb', line 666

def is_not_nan
  _from_rbexpr(_rbexpr.is_not_nan)
end

#is_not_nullExpr

Returns a boolean Series indicating which values are not null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.all.is_not_null.suffix("_not_null"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬────────────┬────────────┐
# │ a    ┆ b   ┆ a_not_null ┆ b_not_null │
# │ ---  ┆ --- ┆ ---        ┆ ---        │
# │ i64  ┆ f64 ┆ bool       ┆ bool       │
# ╞══════╪═════╪════════════╪════════════╡
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 2    ┆ 2.0 ┆ true       ┆ true       │
# │ null ┆ NaN ┆ false      ┆ true       │
# │ 1    ┆ 1.0 ┆ true       ┆ true       │
# │ 5    ┆ 5.0 ┆ true       ┆ true       │
# └──────┴─────┴────────────┴────────────┘

Returns:



548
549
550
# File 'lib/polars/expr.rb', line 548

def is_not_null
  _from_rbexpr(_rbexpr.is_not_null)
end

#is_nullExpr

Returns a boolean Series indicating which values are null.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil, 1, 5],
    "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
  }
)
df.with_column(Polars.all.is_null.suffix("_isnull"))
# =>
# shape: (5, 4)
# ┌──────┬─────┬──────────┬──────────┐
# │ a    ┆ b   ┆ a_isnull ┆ b_isnull │
# │ ---  ┆ --- ┆ ---      ┆ ---      │
# │ i64  ┆ f64 ┆ bool     ┆ bool     │
# ╞══════╪═════╪══════════╪══════════╡
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 2    ┆ 2.0 ┆ false    ┆ false    │
# │ null ┆ NaN ┆ true     ┆ false    │
# │ 1    ┆ 1.0 ┆ false    ┆ false    │
# │ 5    ┆ 5.0 ┆ false    ┆ false    │
# └──────┴─────┴──────────┴──────────┘

Returns:



519
520
521
# File 'lib/polars/expr.rb', line 519

def is_null
  _from_rbexpr(_rbexpr.is_null)
end

#is_uniqueExpr

Get mask of unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").is_unique)
# =>
# shape: (3, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ true  │
# └───────┘

Returns:



2302
2303
2304
# File 'lib/polars/expr.rb', line 2302

def is_unique
  _from_rbexpr(_rbexpr.is_unique)
end

#keep_nameExpr

Keep the original root name of the expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4]
  }
)
df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 9   ┆ 3   │
# │ 18  ┆ 4   │
# └─────┴─────┘

Returns:



407
408
409
# File 'lib/polars/expr.rb', line 407

def keep_name
  name.keep
end

#kurtosis(fisher: true, bias: true) ⇒ Expr

Compute the kurtosis (Fisher or Pearson) of a dataset.

Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").kurtosis)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ f64       │
# ╞═══════════╡
# │ -1.153061 │
# └───────────┘

Parameters:

  • fisher (Boolean) (defaults to: true)

    If true, Fisher's definition is used (normal ==> 0.0). If false, Pearson's definition is used (normal ==> 3.0).

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



4898
4899
4900
# File 'lib/polars/expr.rb', line 4898

def kurtosis(fisher: true, bias: true)
  _from_rbexpr(_rbexpr.kurtosis(fisher, bias))
end

#lastExpr

Get the last value.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").last)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2216
2217
2218
# File 'lib/polars/expr.rb', line 2216

def last
  _from_rbexpr(_rbexpr.last)
end

#le(other) ⇒ Expr

Method equivalent of "less than or equal" operator expr <= other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [5.0, 4.0, Float::NAN, 0.5],
    "y" => [5.0, 3.5, Float::NAN, 2.0]
  }
)
df.with_columns(
  Polars.col("x").le(Polars.col("y")).alias("x <= y")
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x <= y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 5.0 ┆ 5.0 ┆ true   │
# │ 4.0 ┆ 3.5 ┆ false  │
# │ NaN ┆ NaN ┆ true   │
# │ 0.5 ┆ 2.0 ┆ true   │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3185
3186
3187
# File 'lib/polars/expr.rb', line 3185

def le(other)
  self <= other
end

#lenExpr Also known as: length

Count the number of values in this expression.

Examples:

df = Polars::DataFrame.new({"a" => [8, 9, 10], "b" => [nil, 4, 4]})
df.select(Polars.all.len)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 3   │
# └─────┴─────┘

Returns:



741
742
743
# File 'lib/polars/expr.rb', line 741

def len
  _from_rbexpr(_rbexpr.len)
end

#limit(n = 10) ⇒ Expr

Get the first n rows.

Alias for #head.

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



3015
3016
3017
# File 'lib/polars/expr.rb', line 3015

def limit(n = 10)
  head(n)
end

#listListExpr

Create an object namespace of all list related methods.

Returns:



5982
5983
5984
# File 'lib/polars/expr.rb', line 5982

def list
  ListExpr.new(self)
end

#log(base = Math::E) ⇒ Expr

Compute the logarithm to a given base.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").log(2))
# =>
# shape: (3, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.0      │
# │ 1.0      │
# │ 1.584963 │
# └──────────┘

Parameters:

  • base (Float) (defaults to: Math::E)

    Given base, defaults to e.

Returns:



5636
5637
5638
# File 'lib/polars/expr.rb', line 5636

def log(base = Math::E)
  _from_rbexpr(_rbexpr.log(base))
end

#log10Expr

Compute the base 10 logarithm of the input array, element-wise.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").log10)
# =>
# shape: (3, 1)
# ┌─────────┐
# │ values  │
# │ ---     │
# │ f64     │
# ╞═════════╡
# │ 0.0     │
# │ 0.30103 │
# │ 0.60206 │
# └─────────┘

Returns:



267
268
269
# File 'lib/polars/expr.rb', line 267

def log10
  log(10)
end

#lower_boundExpr

Calculate the lower bound.

Returns a unit Series with the lowest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").lower_bound)
# =>
# shape: (1, 1)
# ┌──────────────────────┐
# │ a                    │
# │ ---                  │
# │ i64                  │
# ╞══════════════════════╡
# │ -9223372036854775808 │
# └──────────────────────┘

Returns:



5020
5021
5022
# File 'lib/polars/expr.rb', line 5020

def lower_bound
  _from_rbexpr(_rbexpr.lower_bound)
end

#lt(other) ⇒ Expr

Method equivalent of "less than" operator expr < other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 3.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").lt(Polars.col("y")).alias("x < y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬───────┐
# │ x   ┆ y   ┆ x < y │
# │ --- ┆ --- ┆ ---   │
# │ f64 ┆ f64 ┆ bool  │
# ╞═════╪═════╪═══════╡
# │ 1.0 ┆ 2.0 ┆ true  │
# │ 2.0 ┆ 2.0 ┆ false │
# │ NaN ┆ NaN ┆ false │
# │ 3.0 ┆ 4.0 ┆ true  │
# └─────┴─────┴───────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3218
3219
3220
# File 'lib/polars/expr.rb', line 3218

def lt(other)
  self < other
end

#map_alias(&f) ⇒ Expr

Rename the output of an expression by mapping a function over the root name.

Examples:

df = Polars::DataFrame.new(
  {
    "A" => [1, 2],
    "B" => [3, 4]
  }
)
df.select(
  Polars.all.reverse.map_alias { |colName| colName + "_reverse" }
)
# =>
# shape: (2, 2)
# ┌───────────┬───────────┐
# │ A_reverse ┆ B_reverse │
# │ ---       ┆ ---       │
# │ i64       ┆ i64       │
# ╞═══════════╪═══════════╡
# │ 2         ┆ 4         │
# │ 1         ┆ 3         │
# └───────────┴───────────┘

Returns:



449
450
451
# File 'lib/polars/expr.rb', line 449

def map_alias(&f)
  name.map(&f)
end

#maxExpr

Get maximum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Returns:



1896
1897
1898
# File 'lib/polars/expr.rb', line 1896

def max
  _from_rbexpr(_rbexpr.max)
end

#meanExpr

Get mean value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").mean)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



2000
2001
2002
# File 'lib/polars/expr.rb', line 2000

def mean
  _from_rbexpr(_rbexpr.mean)
end

#medianExpr

Get median value using linear interpolation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").median)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



2020
2021
2022
# File 'lib/polars/expr.rb', line 2020

def median
  _from_rbexpr(_rbexpr.median)
end

#metaMetaExpr

Create an object namespace of all meta related expression methods.

Returns:



6017
6018
6019
# File 'lib/polars/expr.rb', line 6017

def meta
  MetaExpr.new(self)
end

#minExpr

Get minimum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
df.select(Polars.col("a").min)
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ -1.0 │
# └──────┘

Returns:



1916
1917
1918
# File 'lib/polars/expr.rb', line 1916

def min
  _from_rbexpr(_rbexpr.min)
end

#mod(other) ⇒ Expr

Method equivalent of modulus operator expr % other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(Polars.col("x").mod(2).alias("x%2"))
# =>
# shape: (5, 2)
# ┌─────┬─────┐
# │ x   ┆ x%2 │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 0   ┆ 0   │
# │ 1   ┆ 1   │
# │ 2   ┆ 0   │
# │ 3   ┆ 1   │
# │ 4   ┆ 0   │
# └─────┴─────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3394
3395
3396
# File 'lib/polars/expr.rb', line 3394

def mod(other)
  self % other
end

#modeExpr

Compute the most occurring value(s).

Can return multiple Values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 1, 2, 3],
    "b" => [1, 1, 2, 2]
  }
)
df.select(Polars.all.mode)
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 1   │
# │ 1   ┆ 2   │
# └─────┴─────┘

Returns:



1196
1197
1198
# File 'lib/polars/expr.rb', line 1196

def mode
  _from_rbexpr(_rbexpr.mode)
end

#mul(other) ⇒ Expr

Method equivalent of multiplication operator expr * other.

Examples:

df = Polars::DataFrame.new({"x" => [1, 2, 4, 8, 16]})
df.with_columns(
  Polars.col("x").mul(2).alias("x*2"),
  Polars.col("x").mul(Polars.col("x").log(2)).alias("x * xlog2"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬───────────┐
# │ x   ┆ x*2 ┆ x * xlog2 │
# │ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ f64       │
# ╞═════╪═════╪═══════════╡
# │ 1   ┆ 2   ┆ 0.0       │
# │ 2   ┆ 4   ┆ 2.0       │
# │ 4   ┆ 8   ┆ 8.0       │
# │ 8   ┆ 16  ┆ 24.0      │
# │ 16  ┆ 32  ┆ 64.0      │
# └─────┴─────┴───────────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3424
3425
3426
# File 'lib/polars/expr.rb', line 3424

def mul(other)
  self * other
end

#n_uniqueExpr

Count unique values.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").n_unique)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Returns:



2060
2061
2062
# File 'lib/polars/expr.rb', line 2060

def n_unique
  _from_rbexpr(_rbexpr.n_unique)
end

#nameNameExpr

Create an object namespace of all expressions that modify expression names.

Returns:



6024
6025
6026
# File 'lib/polars/expr.rb', line 6024

def name
  NameExpr.new(self)
end

#nan_maxExpr

Get maximum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_max)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘

Returns:



1936
1937
1938
# File 'lib/polars/expr.rb', line 1936

def nan_max
  _from_rbexpr(_rbexpr.nan_max)
end

#nan_minExpr

Get minimum value, but propagate/poison encountered NaN values.

Examples:

df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
df.select(Polars.col("a").nan_min)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ NaN │
# └─────┘

Returns:



1956
1957
1958
# File 'lib/polars/expr.rb', line 1956

def nan_min
  _from_rbexpr(_rbexpr.nan_min)
end

#ne(other) ⇒ Expr

Method equivalent of inequality operator expr != other.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0],
    "y" => [2.0, 2.0, Float::NAN, 4.0]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x != y"),
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ y   ┆ x != y │
# │ --- ┆ --- ┆ ---    │
# │ f64 ┆ f64 ┆ bool   │
# ╞═════╪═════╪════════╡
# │ 1.0 ┆ 2.0 ┆ true   │
# │ 2.0 ┆ 2.0 ┆ false  │
# │ NaN ┆ NaN ┆ false  │
# │ 4.0 ┆ 4.0 ┆ false  │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3251
3252
3253
# File 'lib/polars/expr.rb', line 3251

def ne(other)
  self != other
end

#ne_missing(other) ⇒ Expr

Method equivalent of equality operator expr != other where None == None.

This differs from default ne where null values are propagated.

Examples:

df = Polars::DataFrame.new(
  {
    "x" => [1.0, 2.0, Float::NAN, 4.0, nil, nil],
    "y" => [2.0, 2.0, Float::NAN, 4.0, 5.0, nil]
  }
)
df.with_columns(
  Polars.col("x").ne(Polars.col("y")).alias("x ne y"),
  Polars.col("x").ne_missing(Polars.col("y")).alias("x ne_missing y")
)
# =>
# shape: (6, 4)
# ┌──────┬──────┬────────┬────────────────┐
# │ x    ┆ y    ┆ x ne y ┆ x ne_missing y │
# │ ---  ┆ ---  ┆ ---    ┆ ---            │
# │ f64  ┆ f64  ┆ bool   ┆ bool           │
# ╞══════╪══════╪════════╪════════════════╡
# │ 1.0  ┆ 2.0  ┆ true   ┆ true           │
# │ 2.0  ┆ 2.0  ┆ false  ┆ false          │
# │ NaN  ┆ NaN  ┆ false  ┆ false          │
# │ 4.0  ┆ 4.0  ┆ false  ┆ false          │
# │ null ┆ 5.0  ┆ null   ┆ true           │
# │ null ┆ null ┆ null   ┆ false          │
# └──────┴──────┴────────┴────────────────┘

Parameters:

  • other (Object)

    A literal or expression value to compare with.

Returns:



3289
3290
3291
3292
# File 'lib/polars/expr.rb', line 3289

def ne_missing(other)
  other = Utils.parse_as_expression(other, str_as_lit: true)
  _from_rbexpr(_rbexpr.neq_missing(other))
end

#negExpr

Method equivalent of unary minus operator -expr.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 2, nil]})
df.with_columns(Polars.col("a").neg)
# =>
# shape: (4, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 1    │
# │ 0    │
# │ -2   │
# │ null │
# └──────┘

Returns:



3477
3478
3479
# File 'lib/polars/expr.rb', line 3477

def neg
  -self
end

#null_countExpr

Count null values.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [nil, 1, nil],
    "b" => [1, 2, 3]
  }
)
df.select(Polars.all.null_count)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Returns:



2108
2109
2110
# File 'lib/polars/expr.rb', line 2108

def null_count
  _from_rbexpr(_rbexpr.null_count)
end

#over(expr) ⇒ Expr

Apply window function over a subgroup.

This is similar to a group by + aggregation + self join. Or similar to window functions in Postgres.

Examples:

df = Polars::DataFrame.new(
  {
    "groups" => ["g1", "g1", "g2"],
    "values" => [1, 2, 3]
  }
)
df.with_column(
  Polars.col("values").max.over("groups").alias("max_by_group")
)
# =>
# shape: (3, 3)
# ┌────────┬────────┬──────────────┐
# │ groups ┆ values ┆ max_by_group │
# │ ---    ┆ ---    ┆ ---          │
# │ str    ┆ i64    ┆ i64          │
# ╞════════╪════════╪══════════════╡
# │ g1     ┆ 1      ┆ 2            │
# │ g1     ┆ 2      ┆ 2            │
# │ g2     ┆ 3      ┆ 3            │
# └────────┴────────┴──────────────┘
df = Polars::DataFrame.new(
  {
    "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
    "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
  }
)
df.lazy
  .select([Polars.col("groups").sum.over("groups")])
  .collect
# =>
# shape: (9, 1)
# ┌────────┐
# │ groups │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 4      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 4      │
# │ 6      │
# │ 6      │
# │ 6      │
# │ 4      │
# └────────┘

Parameters:

  • expr (Object)

    Column(s) to group by.

Returns:



2279
2280
2281
2282
# File 'lib/polars/expr.rb', line 2279

def over(expr)
  rbexprs = Utils.selection_to_rbexpr_list(expr)
  _from_rbexpr(_rbexpr.over(rbexprs))
end

#pct_change(n: 1) ⇒ Expr

Computes percentage change between values.

Percentage change (as fraction) between current element and most-recent non-null element at least n period(s) before the current element.

Computes the change from the previous row by default.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [10, 11, 12, nil, 12]
  }
)
df.with_column(Polars.col("a").pct_change.alias("pct_change"))
# =>
# shape: (5, 2)
# ┌──────┬────────────┐
# │ a    ┆ pct_change │
# │ ---  ┆ ---        │
# │ i64  ┆ f64        │
# ╞══════╪════════════╡
# │ 10   ┆ null       │
# │ 11   ┆ 0.1        │
# │ 12   ┆ 0.090909   │
# │ null ┆ 0.0        │
# │ 12   ┆ 0.0        │
# └──────┴────────────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Periods to shift for forming percent change.

Returns:



4836
4837
4838
4839
# File 'lib/polars/expr.rb', line 4836

def pct_change(n: 1)
  n = Utils.parse_as_expression(n)
  _from_rbexpr(_rbexpr.pct_change(n))
end

#peak_maxExpr

Get a boolean mask of the local maximum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(Polars.col("a").peak_max)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ false │
# │ false │
# │ false │
# │ true  │
# └───────┘

Returns:



2377
2378
2379
# File 'lib/polars/expr.rb', line 2377

def peak_max
  _from_rbexpr(_rbexpr.peak_max)
end

#peak_minExpr

Get a boolean mask of the local minimum peaks.

Examples:

df = Polars::DataFrame.new({"a" => [4, 1, 3, 2, 5]})
df.select(Polars.col("a").peak_min)
# =>
# shape: (5, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# │ true  │
# │ false │
# │ true  │
# │ false │
# └───────┘

Returns:



2401
2402
2403
# File 'lib/polars/expr.rb', line 2401

def peak_min
  _from_rbexpr(_rbexpr.peak_min)
end

#pow(exponent) ⇒ Expr

Raise expression to the power of exponent.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").pow(3))
# =>
# shape: (4, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 1.0  │
# │ 8.0  │
# │ 27.0 │
# │ 64.0 │
# └──────┘

Returns:



3532
3533
3534
# File 'lib/polars/expr.rb', line 3532

def pow(exponent)
  self**exponent
end

#prefix(prefix) ⇒ Expr

Add a prefix to the root column name of the expression.

Returns:



414
415
416
# File 'lib/polars/expr.rb', line 414

def prefix(prefix)
  name.prefix(prefix)
end

#productExpr

Compute the product of an expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").product)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 6   │
# └─────┘

Returns:



2040
2041
2042
# File 'lib/polars/expr.rb', line 2040

def product
  _from_rbexpr(_rbexpr.product)
end

#qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false) ⇒ Expr

Bin continuous values into discrete categories based on their quantiles.

Examples:

Divide a column into three categories according to pre-defined quantile probabilities.

df = Polars::DataFrame.new({"foo" => [-2, -1, 0, 1, 2]})
df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], labels: ["a", "b", "c"]).alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ a    │
# │ -1  ┆ a    │
# │ 0   ┆ b    │
# │ 1   ┆ b    │
# │ 2   ┆ c    │
# └─────┴──────┘

Divide a column into two categories using uniform quantile probabilities.

df.with_columns(
  Polars.col("foo")
    .qcut(2, labels: ["low", "high"], left_closed: true)
    .alias("qcut")
)
# =>
# shape: (5, 2)
# ┌─────┬──────┐
# │ foo ┆ qcut │
# │ --- ┆ ---  │
# │ i64 ┆ cat  │
# ╞═════╪══════╡
# │ -2  ┆ low  │
# │ -1  ┆ low  │
# │ 0   ┆ high │
# │ 1   ┆ high │
# │ 2   ┆ high │
# └─────┴──────┘

Add both the category and the breakpoint.

df.with_columns(
  Polars.col("foo").qcut([0.25, 0.75], include_breaks: true).alias("qcut")
).unnest("qcut")
# =>
# shape: (5, 3)
# ┌─────┬──────┬────────────┐
# │ foo ┆ brk  ┆ foo_bin    │
# │ --- ┆ ---  ┆ ---        │
# │ i64 ┆ f64  ┆ cat        │
# ╞═════╪══════╪════════════╡
# │ -2  ┆ -1.0 ┆ (-inf, -1] │
# │ -1  ┆ -1.0 ┆ (-inf, -1] │
# │ 0   ┆ 1.0  ┆ (-1, 1]    │
# │ 1   ┆ 1.0  ┆ (-1, 1]    │
# │ 2   ┆ inf  ┆ (1, inf]   │
# └─────┴──────┴────────────┘

Parameters:

  • quantiles (Array)

    Either a list of quantile probabilities between 0 and 1 or a positive integer determining the number of bins with uniform probability.

  • labels (Array) (defaults to: nil)

    Names of the categories. The number of labels must be equal to the number of categories.

  • left_closed (Boolean) (defaults to: false)

    Set the intervals to be left-closed instead of right-closed.

  • allow_duplicates (Boolean) (defaults to: false)

    If set to true, duplicates in the resulting quantiles are dropped, rather than raising a DuplicateError. This can happen even with unique probabilities, depending on the data.

  • include_breaks (Boolean) (defaults to: false)

    Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a Categorical to a Struct.

Returns:



2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
# File 'lib/polars/expr.rb', line 2612

def qcut(quantiles, labels: nil, left_closed: false, allow_duplicates: false, include_breaks: false)
  if quantiles.is_a?(Integer)
    rbexpr = _rbexpr.qcut_uniform(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  else
    rbexpr = _rbexpr.qcut(
      quantiles, labels, left_closed, allow_duplicates, include_breaks
    )
  end

  _from_rbexpr(rbexpr)
end

#quantile(quantile, interpolation: "nearest") ⇒ Expr

Get quantile value.

Examples:

df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
df.select(Polars.col("a").quantile(0.3))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 2.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘
df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.5 │
# └─────┘

Parameters:

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



2474
2475
2476
2477
# File 'lib/polars/expr.rb', line 2474

def quantile(quantile, interpolation: "nearest")
  quantile = Utils.expr_to_lit_or_expr(quantile, str_to_lit: false)
  _from_rbexpr(_rbexpr.quantile(quantile._rbexpr, interpolation))
end

#rank(method: "average", reverse: false, seed: nil) ⇒ Expr

Assign ranks to data, dealing with ties appropriately.

Examples:

The 'average' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank)
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# │ 4.5 │
# │ 1.5 │
# │ 1.5 │
# │ 4.5 │
# └─────┘

The 'ordinal' method:

df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
df.select(Polars.col("a").rank(method: "ordinal"))
# =>
# shape: (5, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# │ 4   │
# │ 1   │
# │ 2   │
# │ 5   │
# └─────┘

Parameters:

  • method ("average", "min", "max", "dense", "ordinal", "random") (defaults to: "average")

    The method used to assign ranks to tied elements. The following methods are available:

    • 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value.
    • 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.)
    • 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value.
    • 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements.
    • 'ordinal' : All values are given a distinct rank, corresponding to the order that the values occur in the Series.
    • 'random' : Like 'ordinal', but the rank for ties is not dependent on the order that the values occur in the Series.
  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



4769
4770
4771
# File 'lib/polars/expr.rb', line 4769

def rank(method: "average", reverse: false, seed: nil)
  _from_rbexpr(_rbexpr.rank(method, reverse, seed))
end

#rechunkExpr

Create a single chunk of memory for this Series.

Examples:

Create a Series with 3 nulls, append column a then rechunk

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
# =>
# shape: (6, 1)
# ┌────────┐
# │ repeat │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ null   │
# │ null   │
# │ null   │
# │ 1      │
# │ 1      │
# │ 2      │
# └────────┘

Returns:



839
840
841
# File 'lib/polars/expr.rb', line 839

def rechunk
  _from_rbexpr(_rbexpr.rechunk)
end

#reinterpret(signed: false) ⇒ Expr

Reinterpret the underlying bits as a signed/unsigned integer.

This operation is only allowed for 64bit integers. For lower bits integers, you can safely use that cast operation.

Examples:

s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
df = Polars::DataFrame.new([s])
df.select(
  [
    Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
    Polars.col("a").alias("original")
  ]
)
# =>
# shape: (3, 2)
# ┌───────────────┬──────────┐
# │ reinterpreted ┆ original │
# │ ---           ┆ ---      │
# │ i64           ┆ u64      │
# ╞═══════════════╪══════════╡
# │ 1             ┆ 1        │
# │ 1             ┆ 1        │
# │ 2             ┆ 2        │
# └───────────────┴──────────┘

Parameters:

  • signed (Boolean) (defaults to: false)

    If true, reinterpret as :i64. Otherwise, reinterpret as :u64.

Returns:



3792
3793
3794
# File 'lib/polars/expr.rb', line 3792

def reinterpret(signed: false)
  _from_rbexpr(_rbexpr.reinterpret(signed))
end

#repeat_by(by) ⇒ Expr

Repeat the elements in this Series as specified in the given expression.

The repeated elements are expanded into a List.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => ["x", "y", "z"],
    "n" => [1, 2, 3]
  }
)
df.select(Polars.col("a").repeat_by("n"))
# =>
# shape: (3, 1)
# ┌─────────────────┐
# │ a               │
# │ ---             │
# │ list[str]       │
# ╞═════════════════╡
# │ ["x"]           │
# │ ["y", "y"]      │
# │ ["z", "z", "z"] │
# └─────────────────┘

Parameters:

  • by (Object)

    Numeric column that determines how often the values will be repeated. The column will be coerced to UInt32. Give this dtype to make the coercion a no-op.

Returns:



3631
3632
3633
3634
# File 'lib/polars/expr.rb', line 3631

def repeat_by(by)
  by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
  _from_rbexpr(_rbexpr.repeat_by(by._rbexpr))
end

#replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil) ⇒ Expr

Replace values by different values.

Examples:

Replace a single value by another value. Values that were not replaced remain unchanged.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3]})
df.with_columns(replaced: Polars.col("a").replace(2, 100))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 3        │
# └─────┴──────────┘

Replace multiple values by passing sequences to the old and new parameters.

df.with_columns(replaced: Polars.col("a").replace([2, 3], [100, 200]))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ 1        │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Passing a mapping with replacements is also supported as syntactic sugar. Specify a default to set all values that were not matched.

mapping = {2 => 100, 3 => 200}
df.with_columns(replaced: Polars.col("a").replace(mapping, default: -1))
# =>
# shape: (4, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ i64 ┆ i64      │
# ╞═════╪══════════╡
# │ 1   ┆ -1       │
# │ 2   ┆ 100      │
# │ 2   ┆ 100      │
# │ 3   ┆ 200      │
# └─────┴──────────┘

Replacing by values of a different data type sets the return type based on a combination of the new data type and either the original data type or the default data type if it was set.

df = Polars::DataFrame.new({"a" => ["x", "y", "z"]})
mapping = {"x" => 1, "y" => 2, "z" => 3}
df.with_columns(replaced: Polars.col("a").replace(mapping))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ str      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘
df.with_columns(replaced: Polars.col("a").replace(mapping, default: nil))
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ i64      │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Set the return_dtype parameter to control the resulting data type directly.

df.with_columns(
  replaced: Polars.col("a").replace(mapping, return_dtype: Polars::UInt8)
)
# =>
# shape: (3, 2)
# ┌─────┬──────────┐
# │ a   ┆ replaced │
# │ --- ┆ ---      │
# │ str ┆ u8       │
# ╞═════╪══════════╡
# │ x   ┆ 1        │
# │ y   ┆ 2        │
# │ z   ┆ 3        │
# └─────┴──────────┘

Expression input is supported for all parameters.

df = Polars::DataFrame.new({"a" => [1, 2, 2, 3], "b" => [1.5, 2.5, 5.0, 1.0]})
df.with_columns(
  replaced: Polars.col("a").replace(
    Polars.col("a").max,
    Polars.col("b").sum,
    default: Polars.col("b")
  )
)
# =>
# shape: (4, 3)
# ┌─────┬─────┬──────────┐
# │ a   ┆ b   ┆ replaced │
# │ --- ┆ --- ┆ ---      │
# │ i64 ┆ f64 ┆ f64      │
# ╞═════╪═════╪══════════╡
# │ 1   ┆ 1.5 ┆ 1.5      │
# │ 2   ┆ 2.5 ┆ 2.5      │
# │ 2   ┆ 5.0 ┆ 5.0      │
# │ 3   ┆ 1.0 ┆ 10.0     │
# └─────┴─────┴──────────┘

Parameters:

  • old (Object)

    Value or sequence of values to replace. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Also accepts a mapping of values to their replacement.

  • new (Object) (defaults to: NO_DEFAULT)

    Value or sequence of values to replace by. Accepts expression input. Sequences are parsed as Series, other non-expression inputs are parsed as literals. Length must match the length of old or have length 1.

  • default (Object) (defaults to: NO_DEFAULT)

    Set values that were not replaced to this value. Defaults to keeping the original value. Accepts expression input. Non-expression inputs are parsed as literals.

  • return_dtype (Object) (defaults to: nil)

    The data type of the resulting expression. If set to nil (default), the data type is determined automatically based on the other inputs.

Returns:



5953
5954
5955
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
# File 'lib/polars/expr.rb', line 5953

def replace(old, new = NO_DEFAULT, default: NO_DEFAULT, return_dtype: nil)
  if new.eql?(NO_DEFAULT) && old.is_a?(Hash)
    new = Series.new(old.values)
    old = Series.new(old.keys)
  else
    if old.is_a?(::Array)
      old = Series.new(old)
    end
    if new.is_a?(::Array)
      new = Series.new(new)
    end
  end

  old = Utils.parse_as_expression(old, str_as_lit: true)
  new = Utils.parse_as_expression(new, str_as_lit: true)

  default =
    if default.eql?(NO_DEFAULT)
      nil
    else
      Utils.parse_as_expression(default, str_as_lit: true)
    end

  _from_rbexpr(_rbexpr.replace(old, new, default, return_dtype))
end

#reshape(dims) ⇒ Expr

Reshape this Expr to a flat Series or a Series of Lists.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
df.select(Polars.col("foo").reshape([3, 3]))
# =>
# shape: (3, 1)
# ┌───────────┐
# │ foo       │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# │ [4, 5, 6] │
# │ [7, 8, 9] │
# └───────────┘

Parameters:

  • dims (Array)

    Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that dimension is inferred.

Returns:



5333
5334
5335
# File 'lib/polars/expr.rb', line 5333

def reshape(dims)
  _from_rbexpr(_rbexpr.reshape(dims))
end

#reverseExpr

Reverse the selection.

Returns:



1830
1831
1832
# File 'lib/polars/expr.rb', line 1830

def reverse
  _from_rbexpr(_rbexpr.reverse)
end

#rleExpr

Get the lengths of runs of identical values.

Examples:

df = Polars::DataFrame.new(Polars::Series.new("s", [1, 1, 2, 1, nil, 1, 3, 3]))
df.select(Polars.col("s").rle).unnest("s")
# =>
# shape: (6, 2)
# ┌─────────┬────────┐
# │ lengths ┆ values │
# │ ---     ┆ ---    │
# │ i32     ┆ i64    │
# ╞═════════╪════════╡
# │ 2       ┆ 1      │
# │ 1       ┆ 2      │
# │ 1       ┆ 1      │
# │ 1       ┆ null   │
# │ 1       ┆ 1      │
# │ 2       ┆ 3      │
# └─────────┴────────┘

Returns:



2647
2648
2649
# File 'lib/polars/expr.rb', line 2647

def rle
  _from_rbexpr(_rbexpr.rle)
end

#rle_idExpr

Map values to run IDs.

Similar to RLE, but it maps each value to an ID corresponding to the run into which it falls. This is especially useful when you want to define groups by runs of identical values rather than the values themselves.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 1, 1, 1], "b" => ["x", "x", nil, "y", "y"]})
df.with_columns([Polars.col("a").rle_id.alias("a_r"), Polars.struct(["a", "b"]).rle_id.alias("ab_r")])
# =>
# shape: (5, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ a_r ┆ ab_r │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ str  ┆ u32 ┆ u32  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ x    ┆ 0   ┆ 0    │
# │ 2   ┆ x    ┆ 1   ┆ 1    │
# │ 1   ┆ null ┆ 2   ┆ 2    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# │ 1   ┆ y    ┆ 2   ┆ 3    │
# └─────┴──────┴─────┴──────┘

Returns:



2675
2676
2677
# File 'lib/polars/expr.rb', line 2675

def rle_id
  _from_rbexpr(_rbexpr.rle_id)
end

#rolling_max(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left") ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling max (moving max) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_max(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# │ 6.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
# File 'lib/polars/expr.rb', line 4019

def rolling_max(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left"
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_max(
      window_size, weights, min_periods, center, by, closed
    )
  )
end

#rolling_mean(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left") ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling mean (moving mean) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
df.select(
  [
    Polars.col("A").rolling_mean(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 4.5  │
# │ 7.0  │
# │ 4.0  │
# │ 9.0  │
# │ 13.0 │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
# File 'lib/polars/expr.rb', line 4108

def rolling_mean(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left"
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_mean(
      window_size, weights, min_periods, center, by, closed
    )
  )
end

#rolling_median(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left", warn_if_unsorted: true) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling median.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_median(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 6.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4464
4465
4466
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
# File 'lib/polars/expr.rb', line 4464

def rolling_median(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left",
  warn_if_unsorted: true
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_median(
      window_size, weights, min_periods, center, by, closed, warn_if_unsorted
    )
  )
end

#rolling_min(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left") ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling min (moving min) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_min(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 1.0  │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# │ 5.0  │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
# File 'lib/polars/expr.rb', line 3930

def rolling_min(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left"
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_min(
      window_size, weights, min_periods, center, by, closed
    )
  )
end

#rolling_quantile(quantile, interpolation: "nearest", window_size: 2, weights: nil, min_periods: nil, center: false, by: nil, closed: "left", warn_if_unsorted: true) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling quantile.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_quantile(0.33, window_size: 3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ null │
# │ 1.0  │
# │ 2.0  │
# │ 3.0  │
# │ 4.0  │
# └──────┘

Parameters:

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

  • window_size (Integer) (defaults to: 2)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565
4566
4567
4568
4569
4570
4571
4572
4573
# File 'lib/polars/expr.rb', line 4554

def rolling_quantile(
  quantile,
  interpolation: "nearest",
  window_size: 2,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left",
  warn_if_unsorted: true
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_quantile(
      quantile, interpolation, window_size, weights, min_periods, center, by, closed, warn_if_unsorted
    )
  )
end

#rolling_skew(window_size, bias: true) ⇒ Expr

Compute a rolling skew.

Parameters:

  • window_size (Integer)

    Integer size of the rolling window.

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



4647
4648
4649
# File 'lib/polars/expr.rb', line 4647

def rolling_skew(window_size, bias: true)
  _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
end

#rolling_std(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left", ddof: 1, warn_if_unsorted: true) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling standard deviation.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_std(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 1.527525 │
# │ 2.0      │
# └──────────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4286
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303
4304
# File 'lib/polars/expr.rb', line 4286

def rolling_std(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left",
  ddof: 1,
  warn_if_unsorted: true
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_std(
      window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
    )
  )
end

#rolling_sum(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left") ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Apply a rolling sum (moving sum) over the values in this array.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
df.select(
  [
    Polars.col("A").rolling_sum(2)
  ]
)
# =>
# shape: (6, 1)
# ┌──────┐
# │ A    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 3.0  │
# │ 5.0  │
# │ 7.0  │
# │ 9.0  │
# │ 11.0 │
# └──────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
# File 'lib/polars/expr.rb', line 4197

def rolling_sum(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left"
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_sum(
      window_size, weights, min_periods, center, by, closed
    )
  )
end

#rolling_var(window_size, weights: nil, min_periods: nil, center: false, by: nil, closed: "left", ddof: 1, warn_if_unsorted: true) ⇒ Expr

Note:

This functionality is experimental and may change without it being considered a breaking change.

Note:

If you want to compute multiple aggregation statistics over the same dynamic window, consider using group_by_rolling this method can cache the window size computation.

Compute a rolling variance.

A window of length window_size will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the weight vector. The resulting values will be aggregated to their sum.

Examples:

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
df.select(
  [
    Polars.col("A").rolling_var(3)
  ]
)
# =>
# shape: (6, 1)
# ┌──────────┐
# │ A        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ null     │
# │ null     │
# │ 1.0      │
# │ 1.0      │
# │ 2.333333 │
# │ 4.0      │
# └──────────┘

Parameters:

  • window_size (Integer)

    The length of the window. Can be a fixed integer size, or a dynamic temporal size indicated by a timedelta or the following string language:

    • 1ns (1 nanosecond)
    • 1us (1 microsecond)
    • 1ms (1 millisecond)
    • 1s (1 second)
    • 1m (1 minute)
    • 1h (1 hour)
    • 1d (1 day)
    • 1w (1 week)
    • 1mo (1 calendar month)
    • 1y (1 calendar year)
    • 1i (1 index count)

    If a timedelta or the dynamic string language is used, the by and closed arguments must also be set.

  • weights (Array) (defaults to: nil)

    An optional slice with the same length as the window that will be multiplied elementwise with the values in the window.

  • min_periods (Integer) (defaults to: nil)

    The number of values in the window that should be non-null before computing a result. If None, it will be set equal to window size.

  • center (Boolean) (defaults to: false)

    Set the labels at the center of the window

  • by (String) (defaults to: nil)

    If the window_size is temporal for instance "5h" or "3s, you must set the column that will be used to determine the windows. This column must be of dtype {Date, Datetime}

  • closed ("left", "right", "both", "none") (defaults to: "left")

    Define whether the temporal window interval is closed or not.

Returns:



4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
# File 'lib/polars/expr.rb', line 4377

def rolling_var(
  window_size,
  weights: nil,
  min_periods: nil,
  center: false,
  by: nil,
  closed: "left",
  ddof: 1,
  warn_if_unsorted: true
)
  window_size, min_periods = _prepare_rolling_window_args(
    window_size, min_periods
  )
  _from_rbexpr(
    _rbexpr.rolling_var(
      window_size, weights, min_periods, center, by, closed, ddof, warn_if_unsorted
    )
  )
end

#round(decimals = 0) ⇒ Expr

Round underlying floating point data by decimals digits.

Examples:

df = Polars::DataFrame.new({"a" => [0.33, 0.52, 1.02, 1.17]})
df.select(Polars.col("a").round(1))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.3 │
# │ 0.5 │
# │ 1.0 │
# │ 1.2 │
# └─────┘

Parameters:

  • decimals (Integer) (defaults to: 0)

    Number of decimals to round by.

Returns:



1139
1140
1141
# File 'lib/polars/expr.rb', line 1139

def round(decimals = 0)
  _from_rbexpr(_rbexpr.round(decimals))
end

#sample(frac: nil, with_replacement: true, shuffle: false, seed: nil, n: nil) ⇒ Expr

Sample from this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# │ 1   │
# │ 1   │
# └─────┘

Parameters:

  • frac (Float) (defaults to: nil)

    Fraction of items to return. Cannot be used with n.

  • with_replacement (Boolean) (defaults to: true)

    Allow values to be sampled more than once.

  • shuffle (Boolean) (defaults to: false)

    Shuffle the order of sampled data points.

  • seed (Integer) (defaults to: nil)

    Seed for the random number generator. If set to None (default), a random seed is used.

  • n (Integer) (defaults to: nil)

    Number of items to return. Cannot be used with frac.

Returns:



5396
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
# File 'lib/polars/expr.rb', line 5396

def sample(
  frac: nil,
  with_replacement: true,
  shuffle: false,
  seed: nil,
  n: nil
)
  if !n.nil? && !frac.nil?
    raise ArgumentError, "cannot specify both `n` and `frac`"
  end

  if !n.nil? && frac.nil?
    n = Utils.parse_as_expression(n)
    return _from_rbexpr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
  end

  if frac.nil?
    frac = 1.0
  end
  frac = Utils.parse_as_expression(frac)
  _from_rbexpr(
    _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
  )
end

#search_sorted(element, side: "any") ⇒ Expr

Find indices where elements should be inserted to maintain order.

Examples:

df = Polars::DataFrame.new(
  {
    "values" => [1, 2, 3, 5]
  }
)
df.select(
  [
    Polars.col("values").search_sorted(0).alias("zero"),
    Polars.col("values").search_sorted(3).alias("three"),
    Polars.col("values").search_sorted(6).alias("six")
  ]
)
# =>
# shape: (1, 3)
# ┌──────┬───────┬─────┐
# │ zero ┆ three ┆ six │
# │ ---  ┆ ---   ┆ --- │
# │ u32  ┆ u32   ┆ u32 │
# ╞══════╪═══════╪═════╡
# │ 0    ┆ 2     ┆ 4   │
# └──────┴───────┴─────┘

Parameters:

  • element (Object)

    Expression or scalar value.

Returns:



1500
1501
1502
1503
# File 'lib/polars/expr.rb', line 1500

def search_sorted(element, side: "any")
  element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
  _from_rbexpr(_rbexpr.search_sorted(element._rbexpr, side))
end

#set_sorted(descending: false) ⇒ Expr

Note:

This can lead to incorrect results if this Series is not sorted!! Use with care!

Flags the expression as 'sorted'.

Enables downstream code to user fast paths for sorted arrays.

Examples:

df = Polars::DataFrame.new({"values" => [1, 2, 3]})
df.select(Polars.col("values").set_sorted.max)
# =>
# shape: (1, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 3      │
# └────────┘

Parameters:

  • descending (Boolean) (defaults to: false)

    Whether the Series order is descending.

Returns:



5753
5754
5755
# File 'lib/polars/expr.rb', line 5753

def set_sorted(descending: false)
  _from_rbexpr(_rbexpr.set_sorted_flag(descending))
end

#shift(n = 1, fill_value: nil) ⇒ Expr

Shift the values by a given period.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").shift(1))
# =>
# shape: (4, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ null │
# │ 1    │
# │ 2    │
# │ 3    │
# └──────┘

Parameters:

  • n (Integer) (defaults to: 1)

    Number of places to shift (may be negative).

  • fill_value (Object) (defaults to: nil)

    Fill the resulting null values with this value.

Returns:



1625
1626
1627
1628
1629
1630
1631
# File 'lib/polars/expr.rb', line 1625

def shift(n = 1, fill_value: nil)
  if !fill_value.nil?
    fill_value = Utils.parse_as_expression(fill_value, str_as_lit: true)
  end
  n = Utils.parse_as_expression(n)
  _from_rbexpr(_rbexpr.shift(n, fill_value))
end

#shift_and_fill(periods, fill_value) ⇒ Expr

Shift the values by a given period and fill the resulting null values.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
df.select(Polars.col("foo").shift_and_fill(1, "a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ a   │
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • periods (Integer)

    Number of places to shift (may be negative).

  • fill_value (Object)

    Fill nil values with the result of this expression.

Returns:



1657
1658
1659
# File 'lib/polars/expr.rb', line 1657

def shift_and_fill(periods, fill_value)
  shift(periods, fill_value: fill_value)
end

#shrink_dtypeExpr

Shrink numeric columns to the minimal required datatype.

Shrink to the dtype needed to fit the extrema of this Series. This can be used to reduce memory pressure.

Examples:

Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [1, 2, 2 << 32],
    "c" => [-1, 2, 1 << 30],
    "d" => [-112, 2, 112],
    "e" => [-112, 2, 129],
    "f" => ["a", "b", "c"],
    "g" => [0.1, 1.32, 0.12],
    "h" => [true, nil, false]
  }
).select(Polars.all.shrink_dtype)
# =>
# shape: (3, 8)
# ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
# │ a   ┆ b          ┆ c          ┆ d    ┆ e    ┆ f   ┆ g    ┆ h     │
# │ --- ┆ ---        ┆ ---        ┆ ---  ┆ ---  ┆ --- ┆ ---  ┆ ---   │
# │ i8  ┆ i64        ┆ i32        ┆ i8   ┆ i16  ┆ str ┆ f32  ┆ bool  │
# ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
# │ 1   ┆ 1          ┆ -1         ┆ -112 ┆ -112 ┆ a   ┆ 0.1  ┆ true  │
# │ 2   ┆ 2          ┆ 2          ┆ 2    ┆ 2    ┆ b   ┆ 1.32 ┆ null  │
# │ 3   ┆ 8589934592 ┆ 1073741824 ┆ 112  ┆ 129  ┆ c   ┆ 0.12 ┆ false │
# └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘

Returns:



5813
5814
5815
# File 'lib/polars/expr.rb', line 5813

def shrink_dtype
  _from_rbexpr(_rbexpr.shrink_dtype)
end

#shuffle(seed: nil) ⇒ Expr

Shuffle the contents of this expr.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3]})
df.select(Polars.col("a").shuffle(seed: 1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 3   │
# └─────┘

Parameters:

  • seed (Integer) (defaults to: nil)

    Seed for the random number generator. If set to None (default), a random seed is generated using the random module.

Returns:



5359
5360
5361
5362
5363
5364
# File 'lib/polars/expr.rb', line 5359

def shuffle(seed: nil)
  if seed.nil?
    seed = rand(10000)
  end
  _from_rbexpr(_rbexpr.shuffle(seed))
end

#signExpr

Compute the element-wise indication of the sign.

Examples:

df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
df.select(Polars.col("a").sign)
# =>
# shape: (5, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ -1   │
# │ 0    │
# │ 0    │
# │ 1    │
# │ null │
# └──────┘

Returns:



5067
5068
5069
# File 'lib/polars/expr.rb', line 5067

def sign
  _from_rbexpr(_rbexpr.sign)
end

#sinExpr

Compute the element-wise value for the sine.

Examples:

df = Polars::DataFrame.new({"a" => [0.0]})
df.select(Polars.col("a").sin)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.0 │
# └─────┘

Returns:



5087
5088
5089
# File 'lib/polars/expr.rb', line 5087

def sin
  _from_rbexpr(_rbexpr.sin)
end

#sinhExpr

Compute the element-wise value for the hyperbolic sine.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").sinh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.175201 │
# └──────────┘

Returns:



5207
5208
5209
# File 'lib/polars/expr.rb', line 5207

def sinh
  _from_rbexpr(_rbexpr.sinh)
end

#skew(bias: true) ⇒ Expr

Compute the sample skewness of a data set.

For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The function skewtest can be used to determine if the skewness value is close enough to zero, statistically speaking.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").skew)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.343622 │
# └──────────┘

Parameters:

  • bias (Boolean) (defaults to: true)

    If false, the calculations are corrected for statistical bias.

Returns:



4866
4867
4868
# File 'lib/polars/expr.rb', line 4866

def skew(bias: true)
  _from_rbexpr(_rbexpr.skew(bias))
end

#slice(offset, length = nil) ⇒ Expr

Get a slice of this expression.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [8, 9, 10, 11],
    "b" => [nil, 4, 4, 4]
  }
)
df.select(Polars.all.slice(1, 2))
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 9   ┆ 4   │
# │ 10  ┆ 4   │
# └─────┴─────┘

Parameters:

  • offset (Integer)

    Start index. Negative indexing is supported.

  • length (Integer) (defaults to: nil)

    Length of the slice. If set to nil, all rows starting at the offset will be selected.

Returns:



774
775
776
777
778
779
780
781
782
# File 'lib/polars/expr.rb', line 774

def slice(offset, length = nil)
  if !offset.is_a?(Expr)
    offset = Polars.lit(offset)
  end
  if !length.is_a?(Expr)
    length = Polars.lit(length)
  end
  _from_rbexpr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
end

#sort(reverse: false, nulls_last: false) ⇒ Expr

Sort this column. In projection/ selection context the whole column is sorted.

If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
        "one",
        "one",
        "one",
        "two",
        "two",
        "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.select(Polars.col("value").sort)
# =>
# shape: (6, 1)
# ┌───────┐
# │ value │
# │ ---   │
# │ i64   │
# ╞═══════╡
# │ 1     │
# │ 2     │
# │ 3     │
# │ 4     │
# │ 98    │
# │ 99    │
# └───────┘
df.group_by("group").agg(Polars.col("value").sort)
# =>
# shape: (2, 2)
# ┌───────┬────────────┐
# │ group ┆ value      │
# │ ---   ┆ ---        │
# │ str   ┆ list[i64]  │
# ╞═══════╪════════════╡
# │ two   ┆ [3, 4, 99] │
# │ one   ┆ [1, 2, 98] │
# └───────┴────────────┘

Parameters:

  • reverse (Boolean) (defaults to: false)

    false -> order from small to large. true -> order from large to small.

  • nulls_last (Boolean) (defaults to: false)

    If true nulls are considered to be larger than any valid value.

Returns:



1310
1311
1312
# File 'lib/polars/expr.rb', line 1310

def sort(reverse: false, nulls_last: false)
  _from_rbexpr(_rbexpr.sort_with(reverse, nulls_last))
end

#sort_by(by, reverse: false) ⇒ Expr

Sort this column by the ordering of another column, or multiple other columns.

In projection/ selection context the whole column is sorted. If used in a group by context, the groups are sorted.

Examples:

df = Polars::DataFrame.new(
  {
    "group" => [
      "one",
      "one",
      "one",
      "two",
      "two",
      "two"
    ],
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(Polars.col("group").sort_by("value"))
# =>
# shape: (6, 1)
# ┌───────┐
# │ group │
# │ ---   │
# │ str   │
# ╞═══════╡
# │ one   │
# │ one   │
# │ two   │
# │ two   │
# │ one   │
# │ two   │
# └───────┘

Parameters:

  • by (Object)

    The column(s) used for sorting.

  • reverse (Boolean) (defaults to: false)

    false -> order from small to large. true -> order from large to small.

Returns:



1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
# File 'lib/polars/expr.rb', line 1547

def sort_by(by, reverse: false)
  if !by.is_a?(::Array)
    by = [by]
  end
  if !reverse.is_a?(::Array)
    reverse = [reverse]
  end
  by = Utils.selection_to_rbexpr_list(by)

  _from_rbexpr(_rbexpr.sort_by(by, reverse))
end

#sqrtExpr

Compute the square root of the elements.

Examples:

df = Polars::DataFrame.new({"values" => [1.0, 2.0, 4.0]})
df.select(Polars.col("values").sqrt)
# =>
# shape: (3, 1)
# ┌──────────┐
# │ values   │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.0      │
# │ 1.414214 │
# │ 2.0      │
# └──────────┘

Returns:



245
246
247
# File 'lib/polars/expr.rb', line 245

def sqrt
  self**0.5
end

#std(ddof: 1) ⇒ Expr

Get standard deviation.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").std)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Parameters:

  • ddof (Integer) (defaults to: 1)

    Degrees of freedom.

Returns:



1853
1854
1855
# File 'lib/polars/expr.rb', line 1853

def std(ddof: 1)
  _from_rbexpr(_rbexpr.std(ddof))
end

#strStringExpr

Create an object namespace of all string related methods.

Returns:



6031
6032
6033
# File 'lib/polars/expr.rb', line 6031

def str
  StringExpr.new(self)
end

#structStructExpr

Create an object namespace of all struct related methods.

Returns:



6038
6039
6040
# File 'lib/polars/expr.rb', line 6038

def struct
  StructExpr.new(self)
end

#sub(other) ⇒ Expr

Method equivalent of subtraction operator expr - other.

Examples:

df = Polars::DataFrame.new({"x" => [0, 1, 2, 3, 4]})
df.with_columns(
  Polars.col("x").sub(2).alias("x-2"),
  Polars.col("x").sub(Polars.col("x").cum_sum).alias("x-expr"),
)
# =>
# shape: (5, 3)
# ┌─────┬─────┬────────┐
# │ x   ┆ x-2 ┆ x-expr │
# │ --- ┆ --- ┆ ---    │
# │ i64 ┆ i64 ┆ i64    │
# ╞═════╪═════╪════════╡
# │ 0   ┆ -2  ┆ 0      │
# │ 1   ┆ -1  ┆ 0      │
# │ 2   ┆ 0   ┆ -1     │
# │ 3   ┆ 1   ┆ -3     │
# │ 4   ┆ 2   ┆ -6     │
# └─────┴─────┴────────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3454
3455
3456
# File 'lib/polars/expr.rb', line 3454

def sub(other)
  self - other
end

#suffix(suffix) ⇒ Expr

Add a suffix to the root column name of the expression.

Returns:



421
422
423
# File 'lib/polars/expr.rb', line 421

def suffix(suffix)
  name.suffix(suffix)
end

#sumExpr

Note:

Dtypes in :i8, :u8, :i16, and :u16 are cast to :i64 before summing to prevent overflow issues.

Get sum value.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").sum)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 0   │
# └─────┘

Returns:



1980
1981
1982
# File 'lib/polars/expr.rb', line 1980

def sum
  _from_rbexpr(_rbexpr.sum)
end

#tail(n = 10) ⇒ Expr

Get the last n rows.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
df.tail(3)
# =>
# shape: (3, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 5   │
# │ 6   │
# │ 7   │
# └─────┘

Parameters:

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



3003
3004
3005
# File 'lib/polars/expr.rb', line 3003

def tail(n = 10)
  _from_rbexpr(_rbexpr.tail(n))
end

#tanExpr

Compute the element-wise value for the tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tan)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 1.557408 │
# └──────────┘

Returns:



5127
5128
5129
# File 'lib/polars/expr.rb', line 5127

def tan
  _from_rbexpr(_rbexpr.tan)
end

#tanhExpr

Compute the element-wise value for the hyperbolic tangent.

Examples:

df = Polars::DataFrame.new({"a" => [1.0]})
df.select(Polars.col("a").tanh)
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.761594 │
# └──────────┘

Returns:



5247
5248
5249
# File 'lib/polars/expr.rb', line 5247

def tanh
  _from_rbexpr(_rbexpr.tanh)
end

#to_physicalExpr

Cast to physical representation of the logical dtype.

  • :date -> :i32
  • :datetime -> :i64
  • :time -> :i64
  • :duration -> :i64
  • :cat -> :u32
  • Other data types will be left unchanged.

Examples:

Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
  [
    Polars.col("vals").cast(:cat),
    Polars.col("vals")
      .cast(:cat)
      .to_physical
      .alias("vals_physical")
  ]
)
# =>
# shape: (4, 2)
# ┌──────┬───────────────┐
# │ vals ┆ vals_physical │
# │ ---  ┆ ---           │
# │ cat  ┆ u32           │
# ╞══════╪═══════════════╡
# │ a    ┆ 0             │
# │ x    ┆ 1             │
# │ null ┆ null          │
# │ a    ┆ 0             │
# └──────┴───────────────┘

Returns:



178
179
180
# File 'lib/polars/expr.rb', line 178

def to_physical
  _from_rbexpr(_rbexpr.to_physical)
end

#to_sString Also known as: inspect

Returns a string representing the Expr.

Returns:



20
21
22
# File 'lib/polars/expr.rb', line 20

def to_s
  _rbexpr.to_str
end

#top_k(k: 5) ⇒ Expr

Return the k largest elements.

If 'reverse: true` the smallest elements will be given.

Examples:

df = Polars::DataFrame.new(
  {
    "value" => [1, 98, 2, 3, 99, 4]
  }
)
df.select(
  [
    Polars.col("value").top_k.alias("top_k"),
    Polars.col("value").bottom_k.alias("bottom_k")
  ]
)
# =>
# shape: (5, 2)
# ┌───────┬──────────┐
# │ top_k ┆ bottom_k │
# │ ---   ┆ ---      │
# │ i64   ┆ i64      │
# ╞═══════╪══════════╡
# │ 99    ┆ 1        │
# │ 98    ┆ 2        │
# │ 4     ┆ 3        │
# │ 3     ┆ 4        │
# │ 2     ┆ 98       │
# └───────┴──────────┘

Parameters:

  • k (Integer) (defaults to: 5)

    Number of elements to return.

Returns:



1348
1349
1350
1351
# File 'lib/polars/expr.rb', line 1348

def top_k(k: 5)
  k = Utils.parse_as_expression(k)
  _from_rbexpr(_rbexpr.top_k(k))
end

#truediv(other) ⇒ Expr

Method equivalent of float division operator expr / other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [-2, -1, 0, 1, 2], "y" => [0.5, 0.0, 0.0, -4.0, -0.5]}
)
df.with_columns(
  Polars.col("x").truediv(2).alias("x/2"),
  Polars.col("x").truediv(Polars.col("y")).alias("x/y")
)
# =>
# shape: (5, 4)
# ┌─────┬──────┬──────┬───────┐
# │ x   ┆ y    ┆ x/2  ┆ x/y   │
# │ --- ┆ ---  ┆ ---  ┆ ---   │
# │ i64 ┆ f64  ┆ f64  ┆ f64   │
# ╞═════╪══════╪══════╪═══════╡
# │ -2  ┆ 0.5  ┆ -1.0 ┆ -4.0  │
# │ -1  ┆ 0.0  ┆ -0.5 ┆ -inf  │
# │ 0   ┆ 0.0  ┆ 0.0  ┆ NaN   │
# │ 1   ┆ -4.0 ┆ 0.5  ┆ -0.25 │
# │ 2   ┆ -0.5 ┆ 1.0  ┆ -4.0  │
# └─────┴──────┴──────┴───────┘

Parameters:

  • other (Object)

    Numeric literal or expression value.

Returns:



3509
3510
3511
# File 'lib/polars/expr.rb', line 3509

def truediv(other)
  self / other
end

#unique(maintain_order: false) ⇒ Expr

Get unique values of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 1, 2]})
df.select(Polars.col("a").unique(maintain_order: true))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# └─────┘

Parameters:

  • maintain_order (Boolean) (defaults to: false)

    Maintain order of data. This requires more work.

Returns:



2172
2173
2174
2175
2176
2177
2178
# File 'lib/polars/expr.rb', line 2172

def unique(maintain_order: false)
  if maintain_order
    _from_rbexpr(_rbexpr.unique_stable)
  else
    _from_rbexpr(_rbexpr.unique)
  end
end

#unique_countsExpr

Return a count of the unique values in the order of appearance.

This method differs from value_counts in that it does not return the values, only the counts and might be faster

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").unique_counts
  ]
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ id  │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 3   │
# └─────┘

Returns:



5611
5612
5613
# File 'lib/polars/expr.rb', line 5611

def unique_counts
  _from_rbexpr(_rbexpr.unique_counts)
end

#upper_boundExpr

Calculate the upper bound.

Returns a unit Series with the highest value possible for the dtype of this expression.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
df.select(Polars.col("a").upper_bound)
# =>
# shape: (1, 1)
# ┌─────────────────────┐
# │ a                   │
# │ ---                 │
# │ i64                 │
# ╞═════════════════════╡
# │ 9223372036854775807 │
# └─────────────────────┘

Returns:



5043
5044
5045
# File 'lib/polars/expr.rb', line 5043

def upper_bound
  _from_rbexpr(_rbexpr.upper_bound)
end

#value_counts(multithreaded: false, sort: false) ⇒ Expr

Count all unique values and create a struct mapping value to count.

Examples:

df = Polars::DataFrame.new(
  {
    "id" => ["a", "b", "b", "c", "c", "c"]
  }
)
df.select(
  [
    Polars.col("id").value_counts(sort: true),
  ]
)
# =>
# shape: (3, 1)
# ┌───────────┐
# │ id        │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {"c",3}   │
# │ {"b",2}   │
# │ {"a",1}   │
# └───────────┘

Parameters:

  • multithreaded (Boolean) (defaults to: false)

    Better to turn this off in the aggregation context, as it can lead to contention.

  • sort (Boolean) (defaults to: false)

    Ensure the output is sorted from most values to least.

Returns:



5578
5579
5580
# File 'lib/polars/expr.rb', line 5578

def value_counts(multithreaded: false, sort: false)
  _from_rbexpr(_rbexpr.value_counts(multithreaded, sort))
end

#var(ddof: 1) ⇒ Expr

Get variance.

Examples:

df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
df.select(Polars.col("a").var)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 1.0 │
# └─────┘

Parameters:

  • ddof (Integer) (defaults to: 1)

    Degrees of freedom.

Returns:



1876
1877
1878
# File 'lib/polars/expr.rb', line 1876

def var(ddof: 1)
  _from_rbexpr(_rbexpr.var(ddof))
end

#where(predicate) ⇒ Expr

Filter a single column.

Alias for #filter.

Examples:

df = Polars::DataFrame.new(
  {
    "group_col" => ["g1", "g1", "g2"],
    "b" => [1, 2, 3]
  }
)
(
  df.group_by("group_col").agg(
    [
      Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
      Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
    ]
  )
).sort("group_col")
# =>
# shape: (2, 3)
# ┌───────────┬─────┬─────┐
# │ group_col ┆ lt  ┆ gte │
# │ ---       ┆ --- ┆ --- │
# │ str       ┆ i64 ┆ i64 │
# ╞═══════════╪═════╪═════╡
# │ g1        ┆ 1   ┆ 2   │
# │ g2        ┆ 0   ┆ 3   │
# └───────────┴─────┴─────┘

Parameters:

  • predicate (Expr)

    Boolean expression.

Returns:



2752
2753
2754
# File 'lib/polars/expr.rb', line 2752

def where(predicate)
  filter(predicate)
end

#xor(other) ⇒ Expr

Method equivalent of bitwise exclusive-or operator expr ^ other.

Examples:

df = Polars::DataFrame.new(
  {"x" => [true, false, true, false], "y" => [true, true, false, false]}
)
df.with_columns(Polars.col("x").xor(Polars.col("y")).alias("x ^ y"))
# =>
# shape: (4, 3)
# ┌───────┬───────┬───────┐
# │ x     ┆ y     ┆ x ^ y │
# │ ---   ┆ ---   ┆ ---   │
# │ bool  ┆ bool  ┆ bool  │
# ╞═══════╪═══════╪═══════╡
# │ true  ┆ true  ┆ false │
# │ false ┆ true  ┆ true  │
# │ true  ┆ false ┆ true  │
# │ false ┆ false ┆ false │
# └───────┴───────┴───────┘

Parameters:

  • other (Object)

    Integer or boolean value; accepts expression input.

Returns:



3560
3561
3562
# File 'lib/polars/expr.rb', line 3560

def xor(other)
  self ^ other
end

#|(other) ⇒ Expr

Bitwise OR.

Returns:



42
43
44
# File 'lib/polars/expr.rb', line 42

def |(other)
  _from_rbexpr(_rbexpr._or(_to_rbexpr(other)))
end