Module: Polars::Functions

Included in:
Polars
Defined in:
lib/polars/string_cache.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/functions/lazy.rb,
lib/polars/functions/eager.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/functions/datatype.rb,
lib/polars/functions/whenthen.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb

Instance Method Summary collapse

Instance Method Details

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object

Align a sequence of frames using the unique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

Parameters:

  • frames (Array)

    Sequence of DataFrames or LazyFrames.

  • on (Object)

    One or more columns whose unique values will be used to align the frames.

  • select (Object) (defaults to: nil)

    Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.

  • reverse (Object) (defaults to: false)

    Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:



271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'lib/polars/functions/eager.rb', line 271

def align_frames(
  *frames,
  on:,
  select: nil,
  reverse: false
)
  if frames.empty?
    return []
  elsif frames.map(&:class).uniq.length != 1
    raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
  end

  # establish the superset of all "on" column values, sort, and cache
  eager = frames[0].is_a?(DataFrame)
  alignment_frame = (
    concat(frames.map { |df| df.lazy.select(on) })
      .unique(maintain_order: false)
      .sort(on, reverse: reverse)
  )
  alignment_frame = (
    eager ? alignment_frame.collect.lazy : alignment_frame.cache
  )
  # finally, align all frames
  aligned_frames =
    frames.map do |df|
      alignment_frame.join(
        df.lazy,
        on: alignment_frame.columns,
        how: "left"
      ).select(df.columns)
    end
  if !select.nil?
    aligned_frames = aligned_frames.map { |df| df.select(select) }
  end

  eager ? aligned_frames.map(&:collect) : aligned_frames
end

#all(*names, ignore_nulls: true) ⇒ Expr

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



44
45
46
47
48
49
50
# File 'lib/polars/functions/aggregation/vertical.rb', line 44

def all(*names, ignore_nulls: true)
  if names.empty?
    return col("*")
  end

  col(*names).all(drop_nulls: ignore_nulls)
end

#all_horizontal(*exprs) ⇒ Expr

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



34
35
36
37
# File 'lib/polars/functions/aggregation/horizontal.rb', line 34

def all_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.all_horizontal(rbexprs))
end

#any(*names, ignore_nulls: true) ⇒ Expr

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



80
81
82
# File 'lib/polars/functions/aggregation/vertical.rb', line 80

def any(*names, ignore_nulls: true)
  col(*names).any(drop_nulls: ignore_nulls)
end

#any_horizontal(*exprs) ⇒ Expr

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



70
71
72
73
# File 'lib/polars/functions/aggregation/horizontal.rb', line 70

def any_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.any_horizontal(rbexprs))
end

#approx_n_unique(*columns) ⇒ Expr

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



416
417
418
# File 'lib/polars/functions/lazy.rb', line 416

def approx_n_unique(*columns)
  col(*columns).approx_n_unique
end

#arctan2(y, x) ⇒ Expr

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

  • y (Object)

    Column name or Expression.

  • x (Object)

    Column name or Expression.

Returns:



1007
1008
1009
1010
1011
1012
1013
1014
1015
# File 'lib/polars/functions/lazy.rb', line 1007

def arctan2(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
end

#arctan2d(y, x) ⇒ Expr

Compute two argument arctan in degrees.

Returns the angle (in degrees) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

  • y (Object)

    Column name or Expression.

  • x (Object)

    Column name or Expression.

Returns:



1052
1053
1054
1055
1056
1057
1058
1059
1060
# File 'lib/polars/functions/lazy.rb', line 1052

def arctan2d(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
end

#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Also known as: argsort_by

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Examples:

Pass a single column name to compute the arg sort by that column.

df = Polars::DataFrame.new(
  {
    "a" => [0, 1, 1, 0],
    "b" => [3, 2, 3, 2],
    "c" => [1, 2, 3, 4]
  }
)
df.select(Polars.arg_sort_by("a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 3   │
# │ 1   │
# │ 2   │
# └─────┘

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 0   │
# │ 3   │
# └─────┘

Use gather to apply the arg sort to other columns.

df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
# =>
# shape: (4, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

  • exprs (Object)

    Columns use to determine the ordering.

  • more_exprs (Array)

    Additional columns to arg sort by, specified as positional arguments.

  • reverse (Boolean) (defaults to: false)

    Default is ascending.

  • nulls_last (Boolean) (defaults to: false)

    Place null values last.

  • multithreaded (Boolean) (defaults to: true)

    Sort using multiple threads.

  • maintain_order (Boolean) (defaults to: false)

    Whether the order should be maintained if elements are equal.

Returns:



1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
# File 'lib/polars/functions/lazy.rb', line 1218

def arg_sort_by(
  exprs,
  *more_exprs,
  reverse: false,
  nulls_last: false,
  multithreaded: true,
  maintain_order: false
)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  reverse = Utils.extend_bool(reverse, exprs.length, "reverse", "exprs")
  nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
  Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse, nulls_last, multithreaded, maintain_order))
end

#arg_where(condition, eager: false) ⇒ Expr, Series

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

Parameters:

  • condition (Expr)

    Boolean expression to evaluate

  • eager (Boolean) (defaults to: false)

    Whether to apply this function eagerly (as opposed to lazily).

Returns:



1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
# File 'lib/polars/functions/lazy.rb', line 1358

def arg_where(condition, eager: false)
  if eager
    if !condition.is_a?(Series)
      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
    end
    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
  else
    condition = Utils.parse_into_expression(condition, str_as_lit: true)
    Utils.wrap_expr(Plr.arg_where(condition))
  end
end

#coalesce(exprs, *more_exprs) ⇒ Expr

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘
df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘

Parameters:

  • exprs (Array)

    Columns to coalesce. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • more_exprs (Hash)

    Additional columns to coalesce, specified as positional arguments.

Returns:



1416
1417
1418
1419
# File 'lib/polars/functions/lazy.rb', line 1416

def coalesce(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.coalesce(exprs))
end

#col(name, *more_names) ⇒ Expr

Return an expression representing a column in a DataFrame.

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/polars/functions/col.rb', line 6

def col(name, *more_names)
  if more_names.any?
    if Utils.strlike?(name)
      names_str = [name]
      names_str.concat(more_names)
      return Selector._by_name(names_str.map(&:to_s), strict: true).as_expr
    elsif Utils.is_polars_dtype(name)
      dtypes = [name]
      dtypes.concat(more_names)
      return Selector._by_type(dtypes).as_expr
    else
      msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
      raise TypeError, msg
    end
  end

  if Utils.strlike?(name)
    Utils.wrap_expr(Plr.col(name.to_s))
  elsif Utils.is_polars_dtype(name)
    dtypes = [name]
    Selector._by_dtype(dtypes).as_expr
  elsif name.is_a?(::Array) || name.is_a?(::Set)
    names = Array(name)
    if names.empty?
      return Utils.wrap_expr(Plr.cols(names))
    end

    item = names[0]
    if Utils.strlike?(item)
      Selector._by_name(names.map(&:to_s), strict: true).as_expr
    elsif Utils.is_polars_dtype(item)
      Selector._by_dtype(names).as_expr
    else
      msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
      raise TypeError, msg
    end
  else
    msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
    raise TypeError, msg
  end
end

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

Parameters:

  • lazy_frames (Boolean)

    A list of LazyFrames to collect.

  • type_coercion (Boolean) (defaults to: true)

    Do type coercion optimization.

  • predicate_pushdown (Boolean) (defaults to: true)

    Do predicate pushdown optimization.

  • projection_pushdown (Boolean) (defaults to: true)

    Do projection pushdown optimization.

  • simplify_expression (Boolean) (defaults to: true)

    Run simplify expressions optimization.

  • string_cache (Boolean) (defaults to: false)

    This argument is deprecated and will be ignored

  • no_optimization (Boolean) (defaults to: false)

    Turn off optimizations.

  • slice_pushdown (Boolean) (defaults to: true)

    Slice pushdown optimization.

  • common_subplan_elimination (Boolean) (defaults to: true)

    Will try to cache branching subplans that occur on self-joins or unions.

  • allow_streaming (Boolean) (defaults to: false)

    Run parts of the query in a streaming fashion (this is in an alpha state)

Returns:



1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
# File 'lib/polars/functions/lazy.rb', line 1259

def collect_all(
  lazy_frames,
  type_coercion: true,
  predicate_pushdown: true,
  projection_pushdown: true,
  simplify_expression: true,
  string_cache: false,
  no_optimization: false,
  slice_pushdown: true,
  common_subplan_elimination: true,
  allow_streaming: false
)
  if no_optimization
    predicate_pushdown = false
    projection_pushdown = false
    slice_pushdown = false
    common_subplan_elimination = false
  end

  prepared = []

  lazy_frames.each do |lf|
    ldf = lf._ldf.optimization_toggle(
      type_coercion,
      predicate_pushdown,
      projection_pushdown,
      simplify_expression,
      slice_pushdown,
      common_subplan_elimination,
      allow_streaming,
      false
    )
    prepared << ldf
  end

  out = Plr.collect_all(prepared)

  # wrap the rbdataframes into dataframe
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }

  result
end

#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])  # default is 'vertical' strategy
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘
df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.concat([df1, df2], how: "vertical_relaxed")  # 'a' coerced into f64
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘
df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.concat([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘
df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.concat([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘
df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
Polars.concat([df_a1, df_a2, df_a3], how: "align")
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬──────┐
# │ id  ┆ x    ┆ y    ┆ z    │
# │ --- ┆ ---  ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null ┆ 7    │
# │ 2   ┆ 4    ┆ 5    ┆ null │
# │ 3   ┆ null ┆ 6    ┆ 8    │
# └─────┴──────┴──────┴──────┘

Parameters:

  • items (Object)

    DataFrames/Series/LazyFrames to concatenate.

  • rechunk (Boolean) (defaults to: true)

    Make sure that all data is in contiguous memory.

  • how ("vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal") (defaults to: "vertical")
    • Vertical: applies multiple vstack operations.
    • Diagonal: finds a union between the column schemas and fills missing column values with null.
    • Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
  • parallel (Boolean) (defaults to: true)

    Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

Returns:



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/polars/functions/eager.rb', line 96

def concat(items, rechunk: true, how: "vertical", parallel: true)
  elems = items.to_a

  if elems.empty?
    raise ArgumentError, "cannot concat empty list"
  end

  if how == "align"
    if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
      msg = "'align' strategy is not supported for #{elems[0].class.name}"
      raise TypeError, msg
    end

    # establish common columns, maintaining the order in which they appear
    all_columns = elems.flat_map { |e| e.collect_schema.names }
    key = all_columns.uniq.map.with_index.to_h
    common_cols = elems.map { |e| e.collect_schema.names }
      .reduce { |x, y| Set.new(x) & Set.new(y) }
      .sort_by { |k| key[k] }
    # we require at least one key column for 'align'
    if common_cols.empty?
      msg = "'align' strategy requires at least one common column"
      raise InvalidOperationError, msg
    end

    # align the frame data using a full outer join with no suffix-resolution
    # (so we raise an error in case of column collision, like "horizontal")
    lf = elems.map { |df| df.lazy }.reduce do |x, y|
      x.join(
        y,
        how: "full",
        on: common_cols,
        suffix: "_PL_CONCAT_RIGHT",
        maintain_order: "right_left"
      )
      # Coalesce full outer join columns
      .with_columns(
        common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
      )
      .drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
    end.sort(common_cols)

    eager = elems[0].is_a?(DataFrame)
    return eager ? lf.collect : lf
  end

  first = elems[0]

  if first.is_a?(DataFrame)
    if how == "vertical"
      out = Utils.wrap_df(Plr.concat_df(elems))
    elsif how == "vertical_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(no_optimization: true)
    elsif how == "diagonal"
      out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
    elsif how == "diagonal_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(no_optimization: true)
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
    else
      raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
    end
  elsif first.is_a?(LazyFrame)
    if how == "vertical"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
    elsif how == "vertical_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
    elsif how == "diagonal"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
    elsif how == "diagonal_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
    elsif how == "horizontal"
      return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel))
    else
      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
    end
  elsif first.is_a?(Series)
    if how == "vertical"
      out = Utils.wrap_s(Plr.concat_series(elems))
    else
      msg = "Series only supports 'vertical' concat strategy"
      raise ArgumentError, msg
    end
  elsif first.is_a?(Expr)
    out = first
    elems[1..-1].each do |e|
      out = out.append(e)
    end
  else
    raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
  end

  if rechunk
    out.rechunk
  else
    out
  end
end

#concat_list(exprs, *more_exprs) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.

Examples:

Concatenate two existing list columns. Null values are propagated.

df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
df.with_columns(concat_list: Polars.concat_list("a", "b"))
# =>
# shape: (3, 3)
# ┌───────────┬───────────┬─────────────┐
# │ a         ┆ b         ┆ concat_list │
# │ ---       ┆ ---       ┆ ---         │
# │ list[i64] ┆ list[i64] ┆ list[i64]   │
# ╞═══════════╪═══════════╪═════════════╡
# │ [1, 2]    ┆ [4]       ┆ [1, 2, 4]   │
# │ [3]       ┆ []        ┆ [3]         │
# │ [4, 5]    ┆ null      ┆ null        │
# └───────────┴───────────┴─────────────┘

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
# =>
# shape: (3, 2)
# ┌───────────┬─────────────────┐
# │ a         ┆ concat_list     │
# │ ---       ┆ ---             │
# │ list[i64] ┆ list[str]       │
# ╞═══════════╪═════════════════╡
# │ [1, 2]    ┆ ["1", "2", "x"] │
# │ [3]       ┆ ["3", "x"]      │
# │ [4, 5]    ┆ ["4", "5", "x"] │
# └───────────┴─────────────────┘

Create lagged columns and collect them into a list. This mimics a rolling window.

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
df.select(
  Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
)
# =>
# shape: (5, 1)
# ┌───────────────────┐
# │ A_rolling         │
# │ ---               │
# │ list[f64]         │
# ╞═══════════════════╡
# │ [null, null, 1.0] │
# │ [null, 1.0, 2.0]  │
# │ [1.0, 2.0, 9.0]   │
# │ [2.0, 9.0, 2.0]   │
# │ [9.0, 2.0, 13.0]  │
# └───────────────────┘

Returns:



138
139
140
141
# File 'lib/polars/functions/as_datatype.rb', line 138

def concat_list(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.concat_list(exprs))
end

#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      sep: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

Parameters:

  • exprs (Object)

    Columns to concat into a Utf8 Series.

  • sep (String) (defaults to: "")

    String value that will be used to separate the values.

  • ignore_nulls (Boolean) (defaults to: false)

    Ignore null values (default).

Returns:



262
263
264
265
# File 'lib/polars/functions/as_datatype.rb', line 262

def concat_str(exprs, sep: "", ignore_nulls: false)
  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
end

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • method ("pearson", "spearman") (defaults to: "pearson")

    Correlation method.

  • ddof (Integer) (defaults to: nil)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

  • propagate_nans (Boolean) (defaults to: false)

    If true any NaN encountered will lead to NaN in the output. Defaults to False where NaN are regarded as larger than any finite number and thus lead to the highest rank.

Returns:



737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
# File 'lib/polars/functions/lazy.rb', line 737

def corr(
  a,
  b,
  method: "pearson",
  ddof: nil,
  propagate_nans: false
)
  if !ddof.nil?
    warn "The `ddof` parameter has no effect. Do not use it."
  end

  a = Utils.parse_into_expression(a)
  b = Utils.parse_into_expression(b)

  if method == "pearson"
    Utils.wrap_expr(Plr.pearson_corr(a, b))
  elsif method == "spearman"
    Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
  else
    msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
    raise ArgumentError, msg
  end
end

#count(*columns) ⇒ Expr

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



83
84
85
86
87
88
89
90
# File 'lib/polars/functions/lazy.rb', line 83

def count(*columns)
  if columns.empty?
    warn "`Polars.count` is deprecated. Use `Polars.length` instead."
    return Utils.wrap_expr(Plr.len._alias("count"))
  end

  col(*columns).count
end

#cov(a, b, ddof: 1) ⇒ Expr

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



792
793
794
795
796
# File 'lib/polars/functions/lazy.rb', line 792

def cov(a, b, ddof: 1)
  a = Utils.parse_into_expression(a)
  b = Utils.parse_into_expression(b)
  Utils.wrap_expr(Plr.cov(a, b, ddof))
end

#cum_count(*columns, reverse: false) ⇒ Expr

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

Parameters:

  • columns (Array)

    Name(s) of the columns to use.

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



120
121
122
# File 'lib/polars/functions/lazy.rb', line 120

def cum_count(*columns, reverse: false)
  col(*columns).cum_count(reverse: reverse)
end

#cum_fold(acc, function, exprs, returns_scalar: false, return_dtype: nil, include_init: false) ⇒ Object Also known as: cumfold

Note:

If you simply want the first encountered expression as accumulator, consider using cumreduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(
  Polars.cum_fold(Polars.lit(1), ->(acc, x) { acc + x }, Polars.all)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cum_fold  │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {2,5,10}  │
# │ 2   ┆ 4   ┆ 6   ┆ {3,7,13}  │
# │ 3   ┆ 5   ┆ 7   ┆ {4,9,16}  │
# └─────┴─────┴─────┴───────────┘

Parameters:

  • acc (Object)

    Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0).

  • function (Object)

    Function to apply over the accumulator and the value. Fn(acc, value) -> new_value

  • exprs (Object)

    Expressions to aggregate over. May also be a wildcard expression.

  • returns_scalar (Boolean) (defaults to: false)

    Whether or not function applied returns a scalar. This must be set correctly by the user.

  • return_dtype (Object) (defaults to: nil)

    Output datatype. If not set, the dtype will be inferred based on the dtype of the accumulator.

  • include_init (Boolean) (defaults to: false)

    Include the initial accumulator state as struct field.

Returns:



937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
# File 'lib/polars/functions/lazy.rb', line 937

def cum_fold(
  acc,
  function,
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  include_init: false
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.cum_fold(
      acc,
      function,
      exprs,
      returns_scalar,
      rt,
      include_init
    )._alias("cum_fold")
  )
end

#cum_sum(*names) ⇒ Expr Also known as: cumsum

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

Parameters:

  • names (Object)

    Name(s) of the columns to use in the aggregation.

Returns:



277
278
279
# File 'lib/polars/functions/aggregation/vertical.rb', line 277

def cum_sum(*names)
  col(*names).cum_sum
end

#cum_sum_horizontal(*exprs) ⇒ Expr Also known as: cumsum_horizontal

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



241
242
243
244
245
246
247
248
249
# File 'lib/polars/functions/aggregation/horizontal.rb', line 241

def cum_sum_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }

  # (Expr): use u32 as that will not cast to float as eagerly
  Polars.cum_fold(Polars.lit(0).cast(UInt32), ->(a, b) { a + b }, exprs_wrapped).alias(
    "cum_sum"
  )
end

#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
  "date"
)
# =>
# shape: (3,)
# Series: 'date' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

Parameters:

  • start (Object)

    Lower bound of the date range.

  • stop (Object)

    Upper bound of the date range.

  • interval (Object) (defaults to: "1d")

    Interval periods. It can be a polars duration string, such as 3d12h4m25s representing 3 days, 12 hours, 4 minutes, and 25 seconds.

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define whether the temporal window interval is closed or not.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/polars/functions/range/date_range.rb', line 37

def date_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
  )

  if eager
    return F.select(result).to_series
  end

  result
end

#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Note:

interval is created according to the following string language:

  • 1ns (1 nanosecond)
  • 1us (1 microsecond)
  • 1ms (1 millisecond)
  • 1s (1 second)
  • 1m (1 minute)
  • 1h (1 hour)
  • 1d (1 calendar day)
  • 1w (1 calendar week)
  • 1mo (1 calendar month)
  • 1q (1 calendar quarter)
  • 1y (1 calendar year)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Create a column of date ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
    "end" => Date.new(2022, 1, 3)
  }
)
df.with_columns(date_range: Polars.date_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬─────────────────────────────────┐
# │ start      ┆ end        ┆ date_range                      │
# │ ---        ┆ ---        ┆ ---                             │
# │ date       ┆ date       ┆ list[date]                      │
# ╞════════════╪════════════╪═════════════════════════════════╡
# │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
# │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03]        │
# └────────────┴────────────┴─────────────────────────────────┘

Parameters:

  • start (Object)

    Lower bound of the date range.

  • stop (Object)

    Upper bound of the date range.

  • interval (Object) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/polars/functions/range/date_range.rb', line 116

def date_ranges(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return F.select(result).to_series
  end

  result
end

#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Generate a datetime range.

Examples:

Using Polars duration string to specify the interval:

Polars.datetime_range(
  DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns]]
# [
#         2022-01-01 00:00:00
#         2022-02-01 00:00:00
#         2022-03-01 00:00:00
# ]

Specifying a time zone:

Polars.datetime_range(
  DateTime.new(2022, 1, 1),
  DateTime.new(2022, 3, 1),
  "1mo",
  time_zone: "America/New_York",
  eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns, America/New_York]]
# [
#         2022-01-01 00:00:00 EST
#         2022-02-01 00:00:00 EST
#         2022-03-01 00:00:00 EST
# ]

Parameters:

  • start (Object)

    Lower bound of the datetime range.

  • stop (Object)

    Upper bound of the datetime range.

  • interval (String) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil)

    Time unit of the resulting Datetime data type.

  • time_zone (String) (defaults to: nil)

    Time zone of the resulting Datetime data type.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/polars/functions/range/datetime_range.rb', line 52

def datetime_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  result = Utils.wrap_expr(
    Plr.datetime_range(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Create a column of datetime ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "end" => DateTime.new(2022, 1, 3),
  }
)
df.select(datetime_range: Polars.datetime_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌─────────────────────────────────┐
# │ datetime_range                  │
# │ ---                             │
# │ list[datetime[ns]]              │
# ╞═════════════════════════════════╡
# │ [2022-01-01 00:00:00, 2022-01-… │
# │ [2022-01-02 00:00:00, 2022-01-… │
# └─────────────────────────────────┘

Parameters:

  • start (Object)

    Lower bound of the datetime range.

  • stop (Object)

    Upper bound of the datetime range.

  • interval (String) (defaults to: "1d")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil)

    Time unit of the resulting Datetime data type.

  • time_zone (String) (defaults to: nil)

    Time zone of the resulting Datetime data type.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/polars/functions/range/datetime_range.rb', line 119

def datetime_ranges(
  start,
  stop,
  interval: "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.datetime_ranges(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#disable_string_cachenil

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


90
91
92
# File 'lib/polars/string_cache.rb', line 90

def disable_string_cache
  Plr.disable_string_cache
end

#dtype_of(col_or_expr) ⇒ DataTypeExpr

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get a lazily evaluated :class:DataType of a column or expression.

Returns:



10
11
12
13
14
15
16
17
18
19
# File 'lib/polars/functions/datatype.rb', line 10

def dtype_of(col_or_expr)
  e = nil
  if col_or_expr.is_a?(::String)
    e = F.col(col_or_expr)
  else
    e = col_or_expr
  end

  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.of_expr(e._rbexpr))
end

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/polars/functions/as_datatype.rb', line 35

def duration(
  weeks: nil,
  days: nil,
  hours: nil,
  minutes: nil,
  seconds: nil,
  milliseconds: nil,
  microseconds: nil,
  nanoseconds: nil,
  time_unit: "us"
)
  if !weeks.nil?
    weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
  end
  if !days.nil?
    days = Utils.parse_into_expression(days, str_as_lit: false)
  end
  if !hours.nil?
    hours = Utils.parse_into_expression(hours, str_as_lit: false)
  end
  if !minutes.nil?
    minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
  end
  if !seconds.nil?
    seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
  end
  if !milliseconds.nil?
    milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
  end
  if !microseconds.nil?
    microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
  end
  if !nanoseconds.nil?
    nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
  end

  Utils.wrap_expr(
    Plr.duration(
      weeks,
      days,
      hours,
      minutes,
      seconds,
      milliseconds,
      microseconds,
      nanoseconds,
      time_unit
    )
  )
end

#elementExpr

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Returns:



36
37
38
# File 'lib/polars/functions/lazy.rb', line 36

def element
  col("")
end

#enable_string_cachenil

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


63
64
65
# File 'lib/polars/string_cache.rb', line 63

def enable_string_cache
  Plr.enable_string_cache
end

#exclude(columns) ⇒ Object

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

  • columns (Object)

    Column(s) to exclude from selection This can be:

    • a column name, or multiple column names
    • a regular expression starting with ^ and ending with $
    • a dtype or multiple dtypes

Returns:



1121
1122
1123
# File 'lib/polars/functions/lazy.rb', line 1121

def exclude(columns)
  col("*").exclude(columns)
end

#field(name) ⇒ Expr

Select a field in the current struct.with_fields scope.

Parameters:

  • name (Object)

    Name of the field(s) to select.

Returns:



9
10
11
12
13
14
# File 'lib/polars/functions/lazy.rb', line 9

def field(name)
  if name.is_a?(::String)
    name = [name]
  end
  Utils.wrap_expr(Plr.field(name))
end

#first(*columns) ⇒ Expr

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘
df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If not provided (default), returns an expression to take the first column of the context instead.

Returns:



472
473
474
475
476
477
478
# File 'lib/polars/functions/lazy.rb', line 472

def first(*columns)
  if columns.empty?
    return cs.first.as_expr
  end

  col(*columns).first
end

#fold(acc, function, exprs, returns_scalar: false, return_dtype: nil) ⇒ Expr

Accumulate over multiple columns horizontally/row wise with a left fold.

Examples:

Horizontally sum over all columns and add 1.

df = Polars::DataFrame.new(
 {
   "a" => [1, 2, 3],
   "b" => [3, 4, 5],
   "c" => [5, 6, 7]
 }
)
df.select(
  Polars.fold(
    Polars.lit(1), ->(acc, x) { acc + x }, Polars.col("*")
  ).alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 10  │
# │ 13  │
# │ 16  │
# └─────┘

You can also apply a condition/predicate on all columns:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.filter(
  Polars.fold(
    Polars.lit(true),
    ->(acc, x) { acc & x },
    Polars.col("*") > 1
  )
)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Returns:



856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
# File 'lib/polars/functions/lazy.rb', line 856

def fold(
  acc,
  function,
  exprs,
  returns_scalar: false,
  return_dtype: nil
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.fold(
      acc,
      function,
      exprs,
      returns_scalar,
      rt
    )
  )
end

#format(f_string, *args) ⇒ Expr

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

Parameters:

  • f_string (String)

    A string that with placeholders. For example: "hello_{}" or "{}_world

  • args (Object)

    Expression(s) that fill the placeholders

Returns:



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/polars/functions/as_datatype.rb', line 300

def format(f_string, *args)
  if f_string.scan("{}").length != args.length
    raise ArgumentError, "number of placeholders should equal the number of arguments"
  end

  exprs = []

  arguments = args.each
  f_string.split(/(\{\})/).each do |s|
    if s == "{}"
      e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
      exprs << e
    elsif s.length > 0
      exprs << lit(s)
    end
  end

  concat_str(exprs, sep: "")
end

#from_epoch(column, unit: "s", eager: false) ⇒ Object

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

  • unit: "d" returns pl.Date
  • unit: "s" returns pl.Datetime"us"
  • unit: "ms" returns pl.Datetime["ms"]
  • unit: "us" returns pl.Datetime["us"]
  • unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

Parameters:

  • column (Object)

    Series or expression to parse integers to pl.Datetime.

  • unit (String) (defaults to: "s")

    The unit of the timesteps since epoch time.

  • eager (Boolean) (defaults to: false)

    If eager evaluation is true, a Series is returned instead of an Expr.

Returns:



1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
# File 'lib/polars/functions/lazy.rb', line 1452

def from_epoch(column, unit: "s", eager: false)
  if Utils.strlike?(column)
    column = col(column)
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
    column = Series.new(column)
  end

  if unit == "d"
    expr = column.cast(Date)
  elsif unit == "s"
    expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
  elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
    expr = column.cast(Datetime.new(unit))
  else
    raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
  end

  if eager
    if !column.is_a?(Series)
      raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
    else
      column.to_frame.select(expr).to_series
    end
  else
    expr
  end
end

#groups(column) ⇒ Object

Syntactic sugar for Polars.col("foo").agg_groups.

Returns:



1128
1129
1130
# File 'lib/polars/functions/lazy.rb', line 1128

def groups(column)
  col(column).agg_groups
end

#head(column, n = 10) ⇒ Expr

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



629
630
631
# File 'lib/polars/functions/lazy.rb', line 629

def head(column, n = 10)
  col(column).head(n)
end

#implode(*columns) ⇒ Expr

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘
df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



163
164
165
# File 'lib/polars/functions/lazy.rb', line 163

def implode(*columns)
  col(*columns).implode
end

#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ Expr, Series Also known as: arange

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

Parameters:

  • start (Integer, Expr, Series)

    Lower bound of range.

  • stop (Integer, Expr, Series) (defaults to: nil)

    Upper bound of range.

  • step (Integer) (defaults to: 1)

    Step size of the range.

  • eager (Boolean) (defaults to: false)

    If eager evaluation is True, a Series is returned instead of an Expr.

  • dtype (Symbol) (defaults to: nil)

    Apply an explicit integer dtype to the resulting expression (default is Int64).

Returns:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/polars/functions/range/int_range.rb', line 31

def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
  if stop.nil?
    stop = start
    start = 0
  end

  start = Utils.parse_into_expression(start)
  stop = Utils.parse_into_expression(stop)
  dtype ||= Int64
  dtype = dtype.to_s if dtype.is_a?(Symbol)
  result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")

  if eager
    return select(result).to_series
  end

  result
end

#last(*columns) ⇒ Expr

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘
df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘
df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If set to nil (default), returns an expression to take the last column of the context instead.

Returns:



532
533
534
535
536
537
538
# File 'lib/polars/functions/lazy.rb', line 532

def last(*columns)
  if columns.empty?
    return cs.last.as_expr
  end

  col(*columns).last
end

#lenExpr Also known as: length

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using len in conjunction with int_range.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

Returns:



44
45
46
# File 'lib/polars/functions/len.rb', line 44

def len
  Utils.wrap_expr(Plr.len)
end

#lit(value, dtype: nil, allow_object: nil) ⇒ Expr

Return an expression representing a literal value.

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/polars/functions/lit.rb', line 6

def lit(value, dtype: nil, allow_object: nil)
  if value.is_a?(::Time) || value.is_a?(::DateTime)
    time_unit = dtype&.time_unit || "ns"
    time_zone = dtype.&time_zone
    e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
    if time_zone
      return e.dt.replace_time_zone(time_zone.to_s)
    else
      return e
    end
  elsif value.is_a?(::Date)
    return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
  elsif value.is_a?(Polars::Series)
    value = value._s
    return Utils.wrap_expr(Plr.lit(value, allow_object, false))
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
    return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
  elsif dtype
    return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
  end

  Utils.wrap_expr(Plr.lit(value, allow_object, true))
end

#max(*names) ⇒ Expr

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘
df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



135
136
137
# File 'lib/polars/functions/aggregation/vertical.rb', line 135

def max(*names)
  col(*names).max
end

#max_horizontal(*exprs) ⇒ Expr

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



103
104
105
106
# File 'lib/polars/functions/aggregation/horizontal.rb', line 103

def max_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.max_horizontal(rbexprs))
end

#mean(*columns) ⇒ Expr Also known as: avg

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘
df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



285
286
287
# File 'lib/polars/functions/lazy.rb', line 285

def mean(*columns)
  col(*columns).mean
end

#mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:



208
209
210
211
# File 'lib/polars/functions/aggregation/horizontal.rb', line 208

def mean_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
end

#median(*columns) ⇒ Expr

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘
df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



329
330
331
# File 'lib/polars/functions/lazy.rb', line 329

def median(*columns)
  col(*columns).median
end

#min(*names) ⇒ Expr

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘
df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



190
191
192
# File 'lib/polars/functions/aggregation/vertical.rb', line 190

def min(*names)
  col(*names).min
end

#min_horizontal(*exprs) ⇒ Expr

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



136
137
138
139
# File 'lib/polars/functions/aggregation/horizontal.rb', line 136

def min_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.min_horizontal(rbexprs))
end

#n_unique(*columns) ⇒ Expr

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



372
373
374
# File 'lib/polars/functions/lazy.rb', line 372

def n_unique(*columns)
  col(*columns).n_unique
end

#nth(*indices, strict: true) ⇒ Expr

Get the nth column(s) of the context.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.nth(1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# │ 5   │
# │ 2   │
# └─────┘
df.select(Polars.nth(2, 0))
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ c   ┆ a   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ foo ┆ 1   │
# │ bar ┆ 8   │
# │ baz ┆ 3   │
# └─────┴─────┘

Parameters:

  • indices (Array)

    One or more indices representing the columns to retrieve.

Returns:



581
582
583
# File 'lib/polars/functions/lazy.rb', line 581

def nth(*indices, strict: true)
  cs.by_index(*indices, require_all: strict).as_expr
end

#ones(n, dtype: nil, eager: true) ⇒ Object

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: true)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



85
86
87
88
89
90
91
92
# File 'lib/polars/functions/repeat.rb', line 85

def ones(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
    msg = "invalid dtype for `ones`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
end

#quantile(column, quantile, interpolation: "nearest") ⇒ Expr

Syntactic sugar for Polars.col("foo").quantile(...).

Parameters:

  • column (String)

    Column name.

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



1142
1143
1144
# File 'lib/polars/functions/lazy.rb', line 1142

def quantile(column, quantile, interpolation: "nearest")
  col(column).quantile(quantile, interpolation: interpolation)
end

#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ Object

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting eager: true.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

Parameters:

  • value (Object)

    Value to repeat.

  • n (Integer)

    Repeat n times.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. If set to nil (default), data type is inferred from the given value. Defaults to Int32 for integer values, unless Int64 is required to fit the given value. Defaults to Float64 for float values.

  • eager (Boolean) (defaults to: false)

    Run eagerly and collect into a Series.

  • name (String) (defaults to: nil)

    Only used in eager mode. As expression, use alias.

Returns:



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/polars/functions/repeat.rb', line 41

def repeat(value, n, dtype: nil, eager: false, name: nil)
  if !name.nil?
    warn "the `name` argument is deprecated. Use the `alias` method instead."
  end

  if n.is_a?(Integer)
    n = lit(n)
  end

  value = Utils.parse_into_expression(value, str_as_lit: true)
  expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
  if !name.nil?
    expr = expr.alias(name)
  end
  if eager
    return select(expr).to_series
  end
  expr
end

#select(*exprs, **named_exprs) ⇒ DataFrame

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

Parameters:

  • exprs (Array)

    Column(s) to select, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • named_exprs (Hash)

    Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:



1331
1332
1333
# File 'lib/polars/functions/lazy.rb', line 1331

def select(*exprs, **named_exprs)
  DataFrame.new([]).select(*exprs, **named_exprs)
end

#set_random_seed(seed) ⇒ nil

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

Parameters:

  • seed (Integer)

    A non-negative integer < 2**64 used to seed the internal global random number generator.

Returns:

  • (nil)


12
13
14
# File 'lib/polars/functions/random.rb', line 12

def set_random_seed(seed)
  Plr.set_random_seed(seed)
end

#sql_expr(sql) ⇒ Expr

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4   ┆ 2     │
# │ 1   ┆ 1   ┆ 1     │
# └─────┴─────┴───────┘

Parameters:

  • sql (Object)

    One or more SQL expressions.

Returns:



1515
1516
1517
1518
1519
1520
1521
# File 'lib/polars/functions/lazy.rb', line 1515

def sql_expr(sql)
  if sql.is_a?(::String)
    Utils.wrap_expr(Plr.sql_expr(sql))
  else
    sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
  end
end

#std(column, ddof: 1) ⇒ Expr

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘
df["a"].std
# => 3.605551275463989

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



202
203
204
# File 'lib/polars/functions/lazy.rb', line 202

def std(column, ddof: 1)
  col(column).std(ddof: ddof)
end

#struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object

Collect several columns into a Series of dtype Struct.

Examples:

df = Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
)
df.select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.struct("int", false).alias("my_struct"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ my_struct │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {1,false} │
# │ {2,false} │
# └───────────┘

Use keyword arguments to easily name each struct field.

df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}

Parameters:

  • exprs (Array)

    Column(s) to collect into a struct column, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • schema (Hash) (defaults to: nil)

    Optional schema that explicitly defines the struct field dtypes. If no columns or expressions are provided, schema keys are used to define columns.

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

  • named_exprs (Hash)

    Additional columns to collect into the struct column, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:



198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/polars/functions/as_datatype.rb', line 198

def struct(*exprs, schema: nil, eager: false, **named_exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
  expr = Utils.wrap_expr(Plr.as_struct(rbexprs))

  if !schema.nil? && !schema.empty?
    if !exprs.any?
      # no columns or expressions provided; create one from schema keys
      expr =
        Utils.wrap_expr(
          Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
        )
      expr = expr.cast(Struct.new(schema), strict: false)
    end
  end

  if eager
    Polars.select(expr).to_series
  else
    expr
  end
end

#sum(*names) ⇒ Expr

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘
df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



245
246
247
# File 'lib/polars/functions/aggregation/vertical.rb', line 245

def sum(*names)
  col(*names).sum
end

#sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:



172
173
174
175
# File 'lib/polars/functions/aggregation/horizontal.rb', line 172

def sum_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
end

#tail(column, n = 10) ⇒ Expr

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



677
678
679
# File 'lib/polars/functions/lazy.rb', line 677

def tail(column, n = 10)
  col(column).tail(n)
end

#time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object

Generate a time range.

Examples:

Polars.time_range(
  Time.utc(2000, 1, 1, 14, 0),
  nil,
  "3h15m",
  eager: true
).alias("time")
# =>
# shape: (4,)
# Series: 'time' [time]
# [
#         14:00:00
#         17:15:00
#         20:30:00
#         23:45:00
# ]

Parameters:

  • start (Object) (defaults to: nil)

    Lower bound of the time range.

  • stop (Object) (defaults to: nil)

    Upper bound of the time range.

  • interval (String) (defaults to: "1h")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to False (default), return an expression instead.

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/polars/functions/range/time_range.rb', line 35

def time_range(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object

Create a column of time ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
    "end" => Time.utc(2000, 1, 1, 11, 0)
  }
)
df.select(time_range: Polars.time_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌────────────────────────────────┐
# │ time_range                     │
# │ ---                            │
# │ list[time]                     │
# ╞════════════════════════════════╡
# │ [09:00:00, 10:00:00, 11:00:00] │
# │ [10:00:00, 11:00:00]           │
# └────────────────────────────────┘

Parameters:

  • start (Object) (defaults to: nil)

    Lower bound of the time range.

  • stop (Object) (defaults to: nil)

    Upper bound of the time range.

  • interval (Integer) (defaults to: "1h")

    Interval of the range periods, specified using the Polars duration string language.

  • closed ('both', 'left', 'right', 'none') (defaults to: "both")

    Define which sides of the range are closed (inclusive).

  • eager (Boolean) (defaults to: false)

    Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/polars/functions/range/time_range.rb', line 105

def time_ranges(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#using_string_cacheBoolean

Check whether the global string cache is enabled.

Returns:



97
98
99
# File 'lib/polars/string_cache.rb', line 97

def using_string_cache
  Plr.using_string_cache
end

#var(column, ddof: 1) ⇒ Expr

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘
df["a"].var
# => 13.0

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



241
242
243
# File 'lib/polars/functions/lazy.rb', line 241

def var(column, ddof: 1)
  col(column).var(ddof: ddof)
end

#when(*predicates, **constraints) ⇒ When

Start a "when, then, otherwise" expression.

Examples:

Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Or with multiple when-then operations chained:

df.with_columns(
  Polars.when(Polars.col("foo") > 2)
  .then(1)
  .when(Polars.col("bar") > 2)
  .then(4)
  .otherwise(-1)
  .alias("val")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 4   │
# │ 3   ┆ 4   ┆ 1   │
# │ 4   ┆ 0   ┆ 1   │
# └─────┴─────┴─────┘

The otherwise at the end is optional. If left out, any rows where none of the when expressions evaluate to True, are set to null:

df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
# =>
# shape: (3, 3)
# ┌─────┬─────┬──────┐
# │ foo ┆ bar ┆ val  │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ i64 ┆ i32  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 3   ┆ null │
# │ 3   ┆ 4   ┆ 1    │
# │ 4   ┆ 0   ┆ 1    │
# └─────┴─────┴──────┘

Pass multiple predicates, each of which must be met:

df.with_columns(
  val: Polars.when(
    Polars.col("bar") > 0,
    Polars.col("foo") % 2 != 0
  )
  .then(99)
  .otherwise(-1)
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 99  │
# │ 3   ┆ 4   ┆ 99  │
# │ 4   ┆ 0   ┆ -1  │
# └─────┴─────┴─────┘

Pass conditions as keyword arguments:

df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ -1  │
# │ 3   ┆ 4   ┆ -1  │
# │ 4   ┆ 0   ┆ 99  │
# └─────┴─────┴─────┘

Returns:

  • (When)


91
92
93
94
# File 'lib/polars/functions/whenthen.rb', line 91

def when(*predicates, **constraints)
  condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
  When.new(Plr.when(condition))
end

#zeros(n, dtype: nil, eager: true) ⇒ Object

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: true)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



118
119
120
121
122
123
124
125
# File 'lib/polars/functions/repeat.rb', line 118

def zeros(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
    msg = "invalid dtype for `zeros`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
end