Module: Polars::Functions

Included in:: Polars

Defined in:: lib/polars/string_cache.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/functions/lazy.rb,
lib/polars/functions/eager.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/functions/business.rb,
lib/polars/functions/datatype.rb,
lib/polars/functions/whenthen.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/range/time_range.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/range/datetime_range.rb,
lib/polars/functions/aggregation/horizontal.rb

Instance Method Summary collapse

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object
Align a sequence of frames using the unique values from one or more columns as a key.
#all(*names, ignore_nulls: true) ⇒ Expr
Either return an expression representing all columns, or evaluate a bitwise AND operation.
#all_horizontal(*exprs) ⇒ Expr
Compute the bitwise AND horizontally across columns.
#any(*names, ignore_nulls: true) ⇒ Expr
Evaluate a bitwise OR operation.
#any_horizontal(*exprs) ⇒ Expr
Compute the bitwise OR horizontally across columns.
#approx_n_unique(*columns) ⇒ Expr
Approximate count of unique values.
#arctan2(y, x) ⇒ Expr
Compute two argument arctan in radians.
#arctan2d(y, x) ⇒ Expr
Compute two argument arctan in degrees.
#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr (also: #argsort_by)
Find the indexes that would sort the columns.
#arg_where(condition, eager: false) ⇒ Expr, Series
Return indices where condition evaluates true.
#business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr
Count the number of business days between start and end (not including end).
#coalesce(exprs, *more_exprs) ⇒ Expr
Folds the columns from left to right, keeping the first non-null value.
#col(name, *more_names) ⇒ Expr
Return an expression representing a column in a DataFrame.
#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array
Collect multiple LazyFrames at the same time.
#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object
Aggregate multiple Dataframes/Series to a single DataFrame/Series.
#concat_list(exprs, *more_exprs) ⇒ Expr
Concat the arrays in a Series dtype List in linear time.
#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr
Horizontally concat Utf8 Series in linear time.
#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr
Compute the Pearson's or Spearman rank correlation correlation between two columns.
#count(*columns) ⇒ Expr
Return the number of non-null values in the column.
#cov(a, b, ddof: 1) ⇒ Expr
Compute the covariance between two columns/ expressions.
#cum_count(*columns, reverse: false) ⇒ Expr
Return the cumulative count of the non-null values in the column.
#cum_fold(acc, function, exprs, returns_scalar: false, return_dtype: nil, include_init: false) ⇒ Object (also: #cumfold)
Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.
#cum_sum(*names) ⇒ Expr (also: #cumsum)
Cumulatively sum all values.
#cum_sum_horizontal(*exprs) ⇒ Expr (also: #cumsum_horizontal)
Cumulatively sum all values horizontally across columns.
#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
Create a range of type Datetime (or Date).
#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object
Create a column of date ranges.
#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
Generate a datetime range.
#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object
Create a column of datetime ranges.
#disable_string_cache ⇒ nil
Disable and clear the global string cache.
#dtype_of(col_or_expr) ⇒ DataTypeExpr
Get a lazily evaluated :class:DataType of a column or expression.
#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr
Create polars Duration from distinct time components.
#element ⇒ Expr
Alias for an element in evaluated in an eval expression.
#enable_string_cache ⇒ nil
Enable the global string cache.
#exclude(columns) ⇒ Object
Exclude certain columns from a wildcard/regex selection.
#field(name) ⇒ Expr
Select a field in the current struct.with_fields scope.
#first(*columns) ⇒ Expr
Get the first value.
#fold(acc, function, exprs, returns_scalar: false, return_dtype: nil) ⇒ Expr
Accumulate over multiple columns horizontally/row wise with a left fold.
#format(f_string, *args) ⇒ Expr
Format expressions as a string.
#from_epoch(column, unit: "s", eager: false) ⇒ Object
Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).
#groups(column) ⇒ Object
Syntactic sugar for Polars.col("foo").agg_groups.
#head(column, n = 10) ⇒ Expr
Get the first n rows.
#implode(*columns) ⇒ Expr
Aggregate all column values into a list.
#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ Expr, Series (also: #arange)
Create a range expression (or Series).
#last(*columns) ⇒ Expr
Get the last value.
#len ⇒ Expr (also: #length)
Return the number of rows in the context.
#lit(value, dtype: nil, allow_object: nil) ⇒ Expr
Return an expression representing a literal value.
#max(*names) ⇒ Expr
Get the maximum value.
#max_horizontal(*exprs) ⇒ Expr
Get the maximum value horizontally across columns.
#mean(*columns) ⇒ Expr (also: #avg)
Get the mean value.
#mean_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
Compute the mean of all values horizontally across columns.
#median(*columns) ⇒ Expr
Get the median value.
#min(*names) ⇒ Expr
Get the minimum value.
#min_horizontal(*exprs) ⇒ Expr
Get the minimum value horizontally across columns.
#n_unique(*columns) ⇒ Expr
Count unique values.
#nth(*indices, strict: true) ⇒ Expr
Get the nth column(s) of the context.
#ones(n, dtype: nil, eager: true) ⇒ Object
Construct a column of length n filled with ones.
#quantile(column, quantile, interpolation: "nearest") ⇒ Expr
Syntactic sugar for Polars.col("foo").quantile(...).
#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ Object
Repeat a single value n times.
#select(*exprs, **named_exprs) ⇒ DataFrame
Run polars expressions without a context.
#set_random_seed(seed) ⇒ nil
Set the global random seed for Polars.
#sql_expr(sql) ⇒ Expr
Parse one or more SQL expressions to polars expression(s).
#std(column, ddof: 1) ⇒ Expr
Get the standard deviation.
#struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ Object
Collect several columns into a Series of dtype Struct.
#sum(*names) ⇒ Expr
Sum all values.
#sum_horizontal(*exprs, ignore_nulls: true) ⇒ Expr
Sum all values horizontally across columns.
#tail(column, n = 10) ⇒ Expr
Get the last n rows.
#time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
Generate a time range.
#time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ Object
Create a column of time ranges.
#using_string_cache ⇒ Boolean
Check whether the global string cache is enabled.
#var(column, ddof: 1) ⇒ Expr
Get the variance.
#when(*predicates, **constraints) ⇒ When
Start a "when, then, otherwise" expression.
#zeros(n, dtype: nil, eager: true) ⇒ Object
Construct a column of length n filled with zeros.

Instance Method Details

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object`

Align a sequence of frames using the unique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum_horizontal("*").alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# │ 167.5 │
# │ 47.0  │
# └───────┘

Parameters:

frames (Array) —
Sequence of DataFrames or LazyFrames.
on (Object) —
One or more columns whose unique values will be used to align the frames.
select (Object) (defaults to: nil) —
Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.
reverse (Object) (defaults to: false) —
Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:

(Object)

# File 'lib/polars/functions/eager.rb', line 271

def align_frames(
  *frames,
  on:,
  select: nil,
  reverse: false
)
  if frames.empty?
    return []
  elsif frames.map(&:class).uniq.length != 1
    raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
  end

  # establish the superset of all "on" column values, sort, and cache
  eager = frames[0].is_a?(DataFrame)
  alignment_frame = (
    concat(frames.map { |df| df.lazy.select(on) })
      .unique(maintain_order: false)
      .sort(on, reverse: reverse)
  )
  alignment_frame = (
    eager ? alignment_frame.collect.lazy : alignment_frame.cache
  )
  # finally, align all frames
  aligned_frames =
    frames.map do |df|
      alignment_frame.join(
        df.lazy,
        on: alignment_frame.columns,
        how: "left"
      ).select(df.columns)
    end
  if !select.nil?
    aligned_frames = aligned_frames.map { |df| df.select(select) }
  end

  eager ? aligned_frames.map(&:collect) : aligned_frames
end

#all(*names, ignore_nulls: true) ⇒ `Expr`

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default).

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/vertical.rb', line 44

def all(*names, ignore_nulls: true)
  if names.empty?
    return col("*")
  end

  col(*names).all(drop_nulls: ignore_nulls)
end

#all_horizontal(*exprs) ⇒ `Expr`

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 34

def all_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.all_horizontal(rbexprs))
end

#any(*names, ignore_nulls: true) ⇒ `Expr`

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default).

Returns:

(Expr)



80
81
82

# File 'lib/polars/functions/aggregation/vertical.rb', line 80

def any(*names, ignore_nulls: true)
  col(*names).any(drop_nulls: ignore_nulls)
end

#any_horizontal(*exprs) ⇒ `Expr`

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 70

def any_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.any_horizontal(rbexprs))
end

#approx_n_unique(*columns) ⇒ `Expr`

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)



416
417
418

# File 'lib/polars/functions/lazy.rb', line 416

def approx_n_unique(*columns)
  col(*columns).approx_n_unique
end

#arctan2(y, x) ⇒ `Expr`

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

y (Object) —
Column name or Expression.
x (Object) —
Column name or Expression.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 1007

def arctan2(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
end

#arctan2d(y, x) ⇒ `Expr`

Compute two argument arctan in degrees.

Returns the angle (in degrees) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

y (Object) —
Column name or Expression.
x (Object) —
Column name or Expression.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 1052

def arctan2d(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
end

#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ `Expr` Also known as: argsort_by

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Examples:

Pass a single column name to compute the arg sort by that column.

df = Polars::DataFrame.new(
  {
    "a" => [0, 1, 1, 0],
    "b" => [3, 2, 3, 2],
    "c" => [1, 2, 3, 4]
  }
)
df.select(Polars.arg_sort_by("a"))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 3   │
# │ 1   │
# │ 2   │
# └─────┘

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.arg_sort_by(["a", "b"], reverse: true))
# =>
# shape: (4, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# │ 0   │
# │ 3   │
# └─────┘

Use gather to apply the arg sort to other columns.

df.select(Polars.col("c").gather(Polars.arg_sort_by("a")))
# =>
# shape: (4, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 4   │
# │ 2   │
# │ 3   │
# └─────┘

Parameters:

exprs (Object) —
Columns use to determine the ordering.
more_exprs (Array) —
Additional columns to arg sort by, specified as positional arguments.
reverse (Boolean) (defaults to: false) —
Default is ascending.
nulls_last (Boolean) (defaults to: false) —
Place null values last.
multithreaded (Boolean) (defaults to: true) —
Sort using multiple threads.
maintain_order (Boolean) (defaults to: false) —
Whether the order should be maintained if elements are equal.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 1218

def arg_sort_by(
  exprs,
  *more_exprs,
  reverse: false,
  nulls_last: false,
  multithreaded: true,
  maintain_order: false
)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  reverse = Utils.extend_bool(reverse, exprs.length, "reverse", "exprs")
  nulls_last = Utils.extend_bool(nulls_last, exprs.length, "nulls_last", "exprs")
  Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse, nulls_last, multithreaded, maintain_order))
end

#arg_where(condition, eager: false) ⇒ `Expr`, `Series`

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

Parameters:

condition (Expr) —
Boolean expression to evaluate
eager (Boolean) (defaults to: false) —
Whether to apply this function eagerly (as opposed to lazily).

Returns:

(Expr, Series)

# File 'lib/polars/functions/lazy.rb', line 1358

def arg_where(condition, eager: false)
  if eager
    if !condition.is_a?(Series)
      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
    end
    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
  else
    condition = Utils.parse_into_expression(condition, str_as_lit: true)
    Utils.wrap_expr(Plr.arg_where(condition))
  end
end

#business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ `Expr`

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Count the number of business days between start and end (not including end).

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2020, 1, 1), Date.new(2020, 1, 2)],
    "end" => [Date.new(2020, 1, 2), Date.new(2020, 1, 10)]
  }
)
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end")
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 6                  │
# └────────────┴────────────┴────────────────────┘

You can pass a custom weekend - for example, if you only take Sunday off:

week_mask = [true, true, true, true, true, true, false]
df.with_columns(
  business_day_count: Polars.business_day_count(
    "start", "end", week_mask: week_mask
  )
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 1                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 7                  │
# └────────────┴────────────┴────────────────────┘

You can also pass a list of holidays to exclude from the count:

holidays = [Date.new(2020, 1, 1), Date.new(2020, 1, 2)]
df.with_columns(
  business_day_count: Polars.business_day_count("start", "end", holidays: holidays)
)
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬────────────────────┐
# │ start      ┆ end        ┆ business_day_count │
# │ ---        ┆ ---        ┆ ---                │
# │ date       ┆ date       ┆ i32                │
# ╞════════════╪════════════╪════════════════════╡
# │ 2020-01-01 ┆ 2020-01-02 ┆ 0                  │
# │ 2020-01-02 ┆ 2020-01-10 ┆ 5                  │
# └────────────┴────────────┴────────────────────┘

Parameters:

start (Object) —
Start dates.
stop (Object) —
End dates.
week_mask (Array) (defaults to: [true, true, true, true, true, false, false]) —
Which days of the week to count. The default is Monday to Friday. If you wanted to count only Monday to Thursday, you would pass [true, true, true, true, false, false, false].
holidays (Array) (defaults to: []) —
Holidays to exclude from the count.

Returns:

(Expr)

# File 'lib/polars/functions/business.rb', line 76

def business_day_count(
  start,
  stop,
  week_mask: [true, true, true, true, true, false, false],
  holidays: []
)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  unix_epoch = ::Date.new(1970, 1, 1)
  Utils.wrap_expr(
    Plr.business_day_count(
      start_rbexpr,
      end_rbexpr,
      week_mask,
      holidays.map { |holiday| holiday - unix_epoch }
    )
  )
end

#coalesce(exprs, *more_exprs) ⇒ `Expr`

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘

df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘

Parameters:

exprs (Array) —
Columns to coalesce. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
more_exprs (Hash) —
Additional columns to coalesce, specified as positional arguments.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 1416

def coalesce(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.coalesce(exprs))
end

#col(name, *more_names) ⇒ `Expr`

Return an expression representing a column in a DataFrame.

Returns:

(Expr)

# File 'lib/polars/functions/col.rb', line 6

def col(name, *more_names)
  if more_names.any?
    if Utils.strlike?(name)
      names_str = [name]
      names_str.concat(more_names)
      return Selector._by_name(names_str.map(&:to_s), strict: true).as_expr
    elsif Utils.is_polars_dtype(name)
      dtypes = [name]
      dtypes.concat(more_names)
      return Selector._by_type(dtypes).as_expr
    else
      msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
      raise TypeError, msg
    end
  end

  if Utils.strlike?(name)
    Utils.wrap_expr(Plr.col(name.to_s))
  elsif Utils.is_polars_dtype(name)
    dtypes = [name]
    Selector._by_dtype(dtypes).as_expr
  elsif name.is_a?(::Array) || name.is_a?(::Set)
    names = Array(name)
    if names.empty?
      return Utils.wrap_expr(Plr.cols(names))
    end

    item = names[0]
    if Utils.strlike?(item)
      Selector._by_name(names.map(&:to_s), strict: true).as_expr
    elsif Utils.is_polars_dtype(item)
      Selector._by_dtype(names).as_expr
    else
      msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
      raise TypeError, msg
    end
  else
    msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
    raise TypeError, msg
  end
end

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ `Array`

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

Parameters:

lazy_frames (Boolean) —
A list of LazyFrames to collect.
type_coercion (Boolean) (defaults to: true) —
Do type coercion optimization.
predicate_pushdown (Boolean) (defaults to: true) —
Do predicate pushdown optimization.
projection_pushdown (Boolean) (defaults to: true) —
Do projection pushdown optimization.
simplify_expression (Boolean) (defaults to: true) —
Run simplify expressions optimization.
string_cache (Boolean) (defaults to: false) —
This argument is deprecated and will be ignored
no_optimization (Boolean) (defaults to: false) —
Turn off optimizations.
slice_pushdown (Boolean) (defaults to: true) —
Slice pushdown optimization.
common_subplan_elimination (Boolean) (defaults to: true) —
Will try to cache branching subplans that occur on self-joins or unions.
allow_streaming (Boolean) (defaults to: false) —
Run parts of the query in a streaming fashion (this is in an alpha state)

Returns:

(Array)

# File 'lib/polars/functions/lazy.rb', line 1259

def collect_all(
  lazy_frames,
  type_coercion: true,
  predicate_pushdown: true,
  projection_pushdown: true,
  simplify_expression: true,
  string_cache: false,
  no_optimization: false,
  slice_pushdown: true,
  common_subplan_elimination: true,
  allow_streaming: false
)
  if no_optimization
    predicate_pushdown = false
    projection_pushdown = false
    slice_pushdown = false
    common_subplan_elimination = false
  end

  prepared = []

  lazy_frames.each do |lf|
    ldf = lf._ldf.optimization_toggle(
      type_coercion,
      predicate_pushdown,
      projection_pushdown,
      simplify_expression,
      slice_pushdown,
      common_subplan_elimination,
      allow_streaming,
      false
    )
    prepared << ldf
  end

  out = Plr.collect_all(prepared)

  # wrap the rbdataframes into dataframe
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }

  result
end

#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ `Object`

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])  # default is 'vertical' strategy
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2.5], "b" => [4]})
Polars.concat([df1, df2], how: "vertical_relaxed")  # 'a' coerced into f64
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ i64 │
# ╞═════╪═════╡
# │ 1.0 ┆ 3   │
# │ 2.5 ┆ 4   │
# └─────┴─────┘

df_h1 = Polars::DataFrame.new({"l1" => [1, 2], "l2" => [3, 4]})
df_h2 = Polars::DataFrame.new({"r1" => [5, 6], "r2" => [7, 8], "r3" => [9, 10]})
Polars.concat([df_h1, df_h2], how: "horizontal")
# =>
# shape: (2, 5)
# ┌─────┬─────┬─────┬─────┬─────┐
# │ l1  ┆ l2  ┆ r1  ┆ r2  ┆ r3  │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   ┆ 7   ┆ 9   │
# │ 2   ┆ 4   ┆ 6   ┆ 8   ┆ 10  │
# └─────┴─────┴─────┴─────┴─────┘

df_d1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df_d2 = Polars::DataFrame.new({"a" => [2], "c" => [4]})
Polars.concat([df_d1, df_d2], how: "diagonal")
# =>
# shape: (2, 3)
# ┌─────┬──────┬──────┐
# │ a   ┆ b    ┆ c    │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null │
# │ 2   ┆ null ┆ 4    │
# └─────┴──────┴──────┘

df_a1 = Polars::DataFrame.new({"id" => [1, 2], "x" => [3, 4]})
df_a2 = Polars::DataFrame.new({"id" => [2, 3], "y" => [5, 6]})
df_a3 = Polars::DataFrame.new({"id" => [1, 3], "z" => [7, 8]})
Polars.concat([df_a1, df_a2, df_a3], how: "align")
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬──────┐
# │ id  ┆ x    ┆ y    ┆ z    │
# │ --- ┆ ---  ┆ ---  ┆ ---  │
# │ i64 ┆ i64  ┆ i64  ┆ i64  │
# ╞═════╪══════╪══════╪══════╡
# │ 1   ┆ 3    ┆ null ┆ 7    │
# │ 2   ┆ 4    ┆ 5    ┆ null │
# │ 3   ┆ null ┆ 6    ┆ 8    │
# └─────┴──────┴──────┴──────┘

Parameters:

items (Object) —
DataFrames/Series/LazyFrames to concatenate.
rechunk (Boolean) (defaults to: true) —
Make sure that all data is in contiguous memory.
how ("vertical", "vertical_relaxed", "diagonal", "diagonal_relaxed", "horizontal") (defaults to: "vertical") —
- Vertical: applies multiple vstack operations.
- Diagonal: finds a union between the column schemas and fills missing column values with null.
- Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
parallel (Boolean) (defaults to: true) —
Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

Returns:

(Object)

# File 'lib/polars/functions/eager.rb', line 96

def concat(items, rechunk: true, how: "vertical", parallel: true)
  elems = items.to_a

  if elems.empty?
    raise ArgumentError, "cannot concat empty list"
  end

  if how == "align"
    if !elems[0].is_a?(DataFrame) && !elems[0].is_a?(LazyFrame)
      msg = "'align' strategy is not supported for #{elems[0].class.name}"
      raise TypeError, msg
    end

    # establish common columns, maintaining the order in which they appear
    all_columns = elems.flat_map { |e| e.collect_schema.names }
    key = all_columns.uniq.map.with_index.to_h
    common_cols = elems.map { |e| e.collect_schema.names }
      .reduce { |x, y| Set.new(x) & Set.new(y) }
      .sort_by { |k| key[k] }
    # we require at least one key column for 'align'
    if common_cols.empty?
      msg = "'align' strategy requires at least one common column"
      raise InvalidOperationError, msg
    end

    # align the frame data using a full outer join with no suffix-resolution
    # (so we raise an error in case of column collision, like "horizontal")
    lf = elems.map { |df| df.lazy }.reduce do |x, y|
      x.join(
        y,
        how: "full",
        on: common_cols,
        suffix: "_PL_CONCAT_RIGHT",
        maintain_order: "right_left"
      )
      # Coalesce full outer join columns
      .with_columns(
        common_cols.map { |name| F.coalesce([name, "#{name}_PL_CONCAT_RIGHT"]) }
      )
      .drop(common_cols.map { |name| "#{name}_PL_CONCAT_RIGHT" })
    end.sort(common_cols)

    eager = elems[0].is_a?(DataFrame)
    return eager ? lf.collect : lf
  end

  first = elems[0]

  if first.is_a?(DataFrame)
    if how == "vertical"
      out = Utils.wrap_df(Plr.concat_df(elems))
    elsif how == "vertical_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(no_optimization: true)
    elsif how == "diagonal"
      out = Utils.wrap_df(Plr.concat_df_diagonal(elems))
    elsif how == "diagonal_relaxed"
      out = Utils.wrap_ldf(
        Plr.concat_lf_diagonal(
          elems.map { |df| df.lazy },
          rechunk,
          parallel,
          true
        )
      ).collect(no_optimization: true)
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(elems))
    else
      raise ArgumentError, "how must be one of {{'vertical', 'vertical_relaxed', 'diagonal', 'diagonal_relaxed', 'horizontal'}}, got #{how}"
    end
  elsif first.is_a?(LazyFrame)
    if how == "vertical"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, false))
    elsif how == "vertical_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf(elems, rechunk, parallel, true))
    elsif how == "diagonal"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, false))
    elsif how == "diagonal_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(elems, rechunk, parallel, true))
    elsif how == "horizontal"
      return Utils.wrap_ldf(Plr.concat_lf_horizontal(elems, parallel))
    else
      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', 'diagonal', and 'diagonal_relaxed' concat strategy."
    end
  elsif first.is_a?(Series)
    if how == "vertical"
      out = Utils.wrap_s(Plr.concat_series(elems))
    else
      msg = "Series only supports 'vertical' concat strategy"
      raise ArgumentError, msg
    end
  elsif first.is_a?(Expr)
    out = first
    elems[1..-1].each do |e|
      out = out.append(e)
    end
  else
    raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
  end

  if rechunk
    out.rechunk
  else
    out
  end
end

#concat_list(exprs, *more_exprs) ⇒ `Expr`

Concat the arrays in a Series dtype List in linear time.

Examples:

Concatenate two existing list columns. Null values are propagated.

df = Polars::DataFrame.new({"a" => [[1, 2], [3], [4, 5]], "b" => [[4], [], nil]})
df.with_columns(concat_list: Polars.concat_list("a", "b"))
# =>
# shape: (3, 3)
# ┌───────────┬───────────┬─────────────┐
# │ a         ┆ b         ┆ concat_list │
# │ ---       ┆ ---       ┆ ---         │
# │ list[i64] ┆ list[i64] ┆ list[i64]   │
# ╞═══════════╪═══════════╪═════════════╡
# │ [1, 2]    ┆ [4]       ┆ [1, 2, 4]   │
# │ [3]       ┆ []        ┆ [3]         │
# │ [4, 5]    ┆ null      ┆ null        │
# └───────────┴───────────┴─────────────┘

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

df.select("a", concat_list: Polars.concat_list("a", Polars.lit("x")))
# =>
# shape: (3, 2)
# ┌───────────┬─────────────────┐
# │ a         ┆ concat_list     │
# │ ---       ┆ ---             │
# │ list[i64] ┆ list[str]       │
# ╞═══════════╪═════════════════╡
# │ [1, 2]    ┆ ["1", "2", "x"] │
# │ [3]       ┆ ["3", "x"]      │
# │ [4, 5]    ┆ ["4", "5", "x"] │
# └───────────┴─────────────────┘

Create lagged columns and collect them into a list. This mimics a rolling window.

df = Polars::DataFrame.new({"A" => [1.0, 2.0, 9.0, 2.0, 13.0]})
df = df.select(3.times.map { |i| Polars.col("A").shift(i).alias("A_lag_#{i}") })
df.select(
  Polars.concat_list(3.times.map { |i| "A_lag_#{i}" }.reverse).alias("A_rolling")
)
# =>
# shape: (5, 1)
# ┌───────────────────┐
# │ A_rolling         │
# │ ---               │
# │ list[f64]         │
# ╞═══════════════════╡
# │ [null, null, 1.0] │
# │ [null, 1.0, 2.0]  │
# │ [1.0, 2.0, 9.0]   │
# │ [2.0, 9.0, 2.0]   │
# │ [9.0, 2.0, 13.0]  │
# └───────────────────┘

Returns:

(Expr)

# File 'lib/polars/functions/as_datatype.rb', line 138

def concat_list(exprs, *more_exprs)
  exprs = Utils.parse_into_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.concat_list(exprs))
end

#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ `Expr`

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      sep: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

Parameters:

exprs (Object) —
Columns to concat into a Utf8 Series.
sep (String) (defaults to: "") —
String value that will be used to separate the values.
ignore_nulls (Boolean) (defaults to: false) —
Ignore null values (default).

Returns:

(Expr)

# File 'lib/polars/functions/as_datatype.rb', line 262

def concat_str(exprs, sep: "", ignore_nulls: false)
  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
end

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ `Expr`

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Parameters:

a (Object) —
Column name or Expression.
b (Object) —
Column name or Expression.
method ("pearson", "spearman") (defaults to: "pearson") —
Correlation method.
ddof (Integer) (defaults to: nil) —
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.
propagate_nans (Boolean) (defaults to: false) —
If true any NaN encountered will lead to NaN in the output. Defaults to False where NaN are regarded as larger than any finite number and thus lead to the highest rank.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 737

def corr(
  a,
  b,
  method: "pearson",
  ddof: nil,
  propagate_nans: false
)
  if !ddof.nil?
    warn "The `ddof` parameter has no effect. Do not use it."
  end

  a = Utils.parse_into_expression(a)
  b = Utils.parse_into_expression(b)

  if method == "pearson"
    Utils.wrap_expr(Plr.pearson_corr(a, b))
  elsif method == "spearman"
    Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
  else
    msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
    raise ArgumentError, msg
  end
end

#count(*columns) ⇒ `Expr`

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 83

def count(*columns)
  if columns.empty?
    warn "`Polars.count` is deprecated. Use `Polars.length` instead."
    return Utils.wrap_expr(Plr.len._alias("count"))
  end

  col(*columns).count
end

#cov(a, b, ddof: 1) ⇒ `Expr`

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Parameters:

a (Object) —
Column name or Expression.
b (Object) —
Column name or Expression.
ddof (Integer) (defaults to: 1) —
"Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 792

def cov(a, b, ddof: 1)
  a = Utils.parse_into_expression(a)
  b = Utils.parse_into_expression(b)
  Utils.wrap_expr(Plr.cov(a, b, ddof))
end

#cum_count(*columns, reverse: false) ⇒ `Expr`

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

Parameters:

columns (Array) —
Name(s) of the columns to use.
reverse (Boolean) (defaults to: false) —
Reverse the operation.

Returns:

(Expr)



120
121
122

# File 'lib/polars/functions/lazy.rb', line 120

def cum_count(*columns, reverse: false)
  col(*columns).cum_count(reverse: reverse)
end

#cum_fold(acc, function, exprs, returns_scalar: false, return_dtype: nil, include_init: false) ⇒ `Object` Also known as: cumfold

Note:

If you simply want the first encountered expression as accumulator, consider using cumreduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [3, 4, 5],
    "c" => [5, 6, 7]
  }
)
df.with_columns(
  Polars.cum_fold(Polars.lit(1), ->(acc, x) { acc + x }, Polars.all)
)
# =>
# shape: (3, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cum_fold  │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[3] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {2,5,10}  │
# │ 2   ┆ 4   ┆ 6   ┆ {3,7,13}  │
# │ 3   ┆ 5   ┆ 7   ┆ {4,9,16}  │
# └─────┴─────┴─────┴───────────┘

Parameters:

acc (Object) —
Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0).
function (Object) —
Function to apply over the accumulator and the value. Fn(acc, value) -> new_value
exprs (Object) —
Expressions to aggregate over. May also be a wildcard expression.
returns_scalar (Boolean) (defaults to: false) —
Whether or not function applied returns a scalar. This must be set correctly by the user.
return_dtype (Object) (defaults to: nil) —
Output datatype. If not set, the dtype will be inferred based on the dtype of the accumulator.
include_init (Boolean) (defaults to: false) —
Include the initial accumulator state as struct field.

Returns:

(Object)

# File 'lib/polars/functions/lazy.rb', line 937

def cum_fold(
  acc,
  function,
  exprs,
  returns_scalar: false,
  return_dtype: nil,
  include_init: false
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.cum_fold(
      acc,
      function,
      exprs,
      returns_scalar,
      rt,
      include_init
    )._alias("cum_fold")
  )
end

#cum_sum(*names) ⇒ `Expr` Also known as: cumsum

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

Parameters:

names (Object) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)



277
278
279

# File 'lib/polars/functions/aggregation/vertical.rb', line 277

def cum_sum(*names)
  col(*names).cum_sum
end

#cum_sum_horizontal(*exprs) ⇒ `Expr` Also known as: cumsum_horizontal

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 241

def cum_sum_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }

  # (Expr): use u32 as that will not cast to float as eagerly
  Polars.cum_fold(Polars.lit(0).cast(UInt32), ->(a, b) { a + b }, exprs_wrapped).alias(
    "cum_sum"
  )
end

#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", eager: true).alias(
  "date"
)
# =>
# shape: (3,)
# Series: 'date' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

Parameters:

start (Object) —
Lower bound of the date range.
stop (Object) —
Upper bound of the date range.
interval (Object) (defaults to: "1d") —
Interval periods. It can be a polars duration string, such as 3d12h4m25s representing 3 days, 12 hours, 4 minutes, and 25 seconds.
closed ("both", "left", "right", "none") (defaults to: "both") —
Define whether the temporal window interval is closed or not.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/date_range.rb', line 37

def date_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.date_range(start_rbexpr, end_rbexpr, interval, closed)
  )

  if eager
    return F.select(result).to_series
  end

  result
end

#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

Note:

interval is created according to the following string language:

1ns (1 nanosecond)
1us (1 microsecond)
1ms (1 millisecond)
1s (1 second)
1m (1 minute)
1h (1 hour)
1d (1 calendar day)
1w (1 calendar week)
1mo (1 calendar month)
1q (1 calendar quarter)
1y (1 calendar year)

Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds

By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year".

Create a column of date ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Date.new(2022, 1, 1), Date.new(2022, 1, 2)],
    "end" => Date.new(2022, 1, 3)
  }
)
df.with_columns(date_range: Polars.date_ranges("start", "end"))
# =>
# shape: (2, 3)
# ┌────────────┬────────────┬─────────────────────────────────┐
# │ start      ┆ end        ┆ date_range                      │
# │ ---        ┆ ---        ┆ ---                             │
# │ date       ┆ date       ┆ list[date]                      │
# ╞════════════╪════════════╪═════════════════════════════════╡
# │ 2022-01-01 ┆ 2022-01-03 ┆ [2022-01-01, 2022-01-02, 2022-… │
# │ 2022-01-02 ┆ 2022-01-03 ┆ [2022-01-02, 2022-01-03]        │
# └────────────┴────────────┴─────────────────────────────────┘

Parameters:

start (Object) —
Lower bound of the date range.
stop (Object) —
Upper bound of the date range.
interval (Object) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language (see "Notes" section below).
closed ("both", "left", "right", "none") (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/date_range.rb', line 116

def date_ranges(
  start,
  stop,
  interval = "1d",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.date_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return F.select(result).to_series
  end

  result
end

#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

Generate a datetime range.

Examples:

Using Polars duration string to specify the interval:

Polars.datetime_range(
  DateTime.new(2022, 1, 1), DateTime.new(2022, 3, 1), "1mo", eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns]]
# [
#         2022-01-01 00:00:00
#         2022-02-01 00:00:00
#         2022-03-01 00:00:00
# ]

Specifying a time zone:

Polars.datetime_range(
  DateTime.new(2022, 1, 1),
  DateTime.new(2022, 3, 1),
  "1mo",
  time_zone: "America/New_York",
  eager: true
).alias("datetime")
# =>
# shape: (3,)
# Series: 'datetime' [datetime[ns, America/New_York]]
# [
#         2022-01-01 00:00:00 EST
#         2022-02-01 00:00:00 EST
#         2022-03-01 00:00:00 EST
# ]

Parameters:

start (Object) —
Lower bound of the datetime range.
stop (Object) —
Upper bound of the datetime range.
interval (String) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil) —
Time unit of the resulting Datetime data type.
time_zone (String) (defaults to: nil) —
Time zone of the resulting Datetime data type.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/datetime_range.rb', line 52

def datetime_range(
  start,
  stop,
  interval = "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)
  result = Utils.wrap_expr(
    Plr.datetime_range(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

Create a column of datetime ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "end" => DateTime.new(2022, 1, 3),
  }
)
df.select(datetime_range: Polars.datetime_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌─────────────────────────────────┐
# │ datetime_range                  │
# │ ---                             │
# │ list[datetime[ns]]              │
# ╞═════════════════════════════════╡
# │ [2022-01-01 00:00:00, 2022-01-… │
# │ [2022-01-02 00:00:00, 2022-01-… │
# └─────────────────────────────────┘

Parameters:

start (Object) —
Lower bound of the datetime range.
stop (Object) —
Upper bound of the datetime range.
interval (String) (defaults to: "1d") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
time_unit (nil, 'ns', 'us', 'ms') (defaults to: nil) —
Time unit of the resulting Datetime data type.
time_zone (String) (defaults to: nil) —
Time zone of the resulting Datetime data type.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/datetime_range.rb', line 119

def datetime_ranges(
  start,
  stop,
  interval: "1d",
  closed: "both",
  time_unit: nil,
  time_zone: nil,
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  if time_unit.nil? && interval.include?("ns")
    time_unit = "ns"
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(
    Plr.datetime_ranges(
      start_rbexpr, end_rbexpr, interval, closed, time_unit, time_zone
    )
  )

  if eager
    return Polars.select(result).to_series
  end

  result
end

#disable_string_cache ⇒ `nil`

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

(nil)



90
91
92

# File 'lib/polars/string_cache.rb', line 90

def disable_string_cache
  Plr.disable_string_cache
end

#dtype_of(col_or_expr) ⇒ `DataTypeExpr`

Note:

This functionality is considered unstable. It may be changed at any point without it being considered a breaking change.

Get a lazily evaluated :class:DataType of a column or expression.

Returns:

(DataTypeExpr)

# File 'lib/polars/functions/datatype.rb', line 10

def dtype_of(col_or_expr)
  e = nil
  if col_or_expr.is_a?(::String)
    e = F.col(col_or_expr)
  else
    e = col_or_expr
  end

  DataTypeExpr._from_rbdatatype_expr(RbDataTypeExpr.of_expr(e._rbexpr))
end

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ `Expr`

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

Returns:

(Expr)

# File 'lib/polars/functions/as_datatype.rb', line 35

def duration(
  weeks: nil,
  days: nil,
  hours: nil,
  minutes: nil,
  seconds: nil,
  milliseconds: nil,
  microseconds: nil,
  nanoseconds: nil,
  time_unit: "us"
)
  if !weeks.nil?
    weeks = Utils.parse_into_expression(weeks, str_as_lit: false)
  end
  if !days.nil?
    days = Utils.parse_into_expression(days, str_as_lit: false)
  end
  if !hours.nil?
    hours = Utils.parse_into_expression(hours, str_as_lit: false)
  end
  if !minutes.nil?
    minutes = Utils.parse_into_expression(minutes, str_as_lit: false)
  end
  if !seconds.nil?
    seconds = Utils.parse_into_expression(seconds, str_as_lit: false)
  end
  if !milliseconds.nil?
    milliseconds = Utils.parse_into_expression(milliseconds, str_as_lit: false)
  end
  if !microseconds.nil?
    microseconds = Utils.parse_into_expression(microseconds, str_as_lit: false)
  end
  if !nanoseconds.nil?
    nanoseconds = Utils.parse_into_expression(nanoseconds, str_as_lit: false)
  end

  Utils.wrap_expr(
    Plr.duration(
      weeks,
      days,
      hours,
      minutes,
      seconds,
      milliseconds,
      microseconds,
      nanoseconds,
      time_unit
    )
  )
end

#element ⇒ `Expr`

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Returns:

(Expr)



36
37
38

# File 'lib/polars/functions/lazy.rb', line 36

def element
  col("")
end

#enable_string_cache ⇒ `nil`

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

(nil)



63
64
65

# File 'lib/polars/string_cache.rb', line 63

def enable_string_cache
  Plr.enable_string_cache
end

#exclude(columns) ⇒ `Object`

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

columns (Object) —
Column(s) to exclude from selection This can be:
- a column name, or multiple column names
- a regular expression starting with ^ and ending with $
- a dtype or multiple dtypes

Returns:

(Object)



1121
1122
1123

# File 'lib/polars/functions/lazy.rb', line 1121

def exclude(columns)
  col("*").exclude(columns)
end

#field(name) ⇒ `Expr`

Select a field in the current struct.with_fields scope.

Parameters:

name (Object) —
Name of the field(s) to select.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 9

def field(name)
  if name.is_a?(::String)
    name = [name]
  end
  Utils.wrap_expr(Plr.field(name))
end

#first(*columns) ⇒ `Expr`

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘

df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names. If not provided (default), returns an expression to take the first column of the context instead.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 472

def first(*columns)
  if columns.empty?
    return cs.first.as_expr
  end

  col(*columns).first
end

#fold(acc, function, exprs, returns_scalar: false, return_dtype: nil) ⇒ `Expr`

Accumulate over multiple columns horizontally/row wise with a left fold.

Examples:

Horizontally sum over all columns and add 1.

df = Polars::DataFrame.new(
 {
   "a" => [1, 2, 3],
   "b" => [3, 4, 5],
   "c" => [5, 6, 7]
 }
)
df.select(
  Polars.fold(
    Polars.lit(1), ->(acc, x) { acc + x }, Polars.col("*")
  ).alias("sum")
)
# =>
# shape: (3, 1)
# ┌─────┐
# │ sum │
# │ --- │
# │ i32 │
# ╞═════╡
# │ 10  │
# │ 13  │
# │ 16  │
# └─────┘

You can also apply a condition/predicate on all columns:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [0, 1, 2]
  }
)
df.filter(
  Polars.fold(
    Polars.lit(true),
    ->(acc, x) { acc & x },
    Polars.col("*") > 1
  )
)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 856

def fold(
  acc,
  function,
  exprs,
  returns_scalar: false,
  return_dtype: nil
)
  acc = Utils.parse_into_expression(acc, str_as_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  rt = nil
  if !return_dtype.nil?
    rt = Utils.parse_into_datatype_expr(return_dtype)._rbdatatype_expr
  end

  exprs = Utils.parse_into_list_of_expressions(exprs)
  Utils.wrap_expr(
    Plr.fold(
      acc,
      function,
      exprs,
      returns_scalar,
      rt
    )
  )
end

#format(f_string, *args) ⇒ `Expr`

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

Parameters:

f_string (String) —
A string that with placeholders. For example: "hello_{}" or "{}_world
args (Object) —
Expression(s) that fill the placeholders

Returns:

(Expr)

# File 'lib/polars/functions/as_datatype.rb', line 300

def format(f_string, *args)
  if f_string.scan("{}").length != args.length
    raise ArgumentError, "number of placeholders should equal the number of arguments"
  end

  exprs = []

  arguments = args.each
  f_string.split(/(\{\})/).each do |s|
    if s == "{}"
      e = Utils.wrap_expr(Utils.parse_into_expression(arguments.next))
      exprs << e
    elsif s.length > 0
      exprs << lit(s)
    end
  end

  concat_str(exprs, sep: "")
end

#from_epoch(column, unit: "s", eager: false) ⇒ `Object`

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

unit: "d" returns pl.Date
unit: "s" returns pl.Datetime"us"
unit: "ms" returns pl.Datetime["ms"]
unit: "us" returns pl.Datetime["us"]
unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

Parameters:

column (Object) —
Series or expression to parse integers to pl.Datetime.
unit (String) (defaults to: "s") —
The unit of the timesteps since epoch time.
eager (Boolean) (defaults to: false) —
If eager evaluation is true, a Series is returned instead of an Expr.

Returns:

(Object)

# File 'lib/polars/functions/lazy.rb', line 1452

def from_epoch(column, unit: "s", eager: false)
  if Utils.strlike?(column)
    column = col(column)
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
    column = Series.new(column)
  end

  if unit == "d"
    expr = column.cast(Date)
  elsif unit == "s"
    expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
  elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
    expr = column.cast(Datetime.new(unit))
  else
    raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
  end

  if eager
    if !column.is_a?(Series)
      raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
    else
      column.to_frame.select(expr).to_series
    end
  else
    expr
  end
end

#groups(column) ⇒ `Object`

Syntactic sugar for Polars.col("foo").agg_groups.

Returns:

(Object)



1128
1129
1130

# File 'lib/polars/functions/lazy.rb', line 1128

def groups(column)
  col(column).agg_groups
end

#head(column, n = 10) ⇒ `Expr`

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

Parameters:

column (Object) —
Column name.
n (Integer) (defaults to: 10) —
Number of rows to return.

Returns:

(Expr)



629
630
631

# File 'lib/polars/functions/lazy.rb', line 629

def head(column, n = 10)
  col(column).head(n)
end

#implode(*columns) ⇒ `Expr`

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘

df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)



163
164
165

# File 'lib/polars/functions/lazy.rb', line 163

def implode(*columns)
  col(*columns).implode
end

#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ `Expr`, `Series` Also known as: arange

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

Parameters:

start (Integer, Expr, Series) —
Lower bound of range.
stop (Integer, Expr, Series) (defaults to: nil) —
Upper bound of range.
step (Integer) (defaults to: 1) —
Step size of the range.
eager (Boolean) (defaults to: false) —
If eager evaluation is True, a Series is returned instead of an Expr.
dtype (Symbol) (defaults to: nil) —
Apply an explicit integer dtype to the resulting expression (default is Int64).

Returns:

(Expr, Series)

# File 'lib/polars/functions/range/int_range.rb', line 31

def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
  if stop.nil?
    stop = start
    start = 0
  end

  start = Utils.parse_into_expression(start)
  stop = Utils.parse_into_expression(stop)
  dtype ||= Int64
  dtype = dtype.to_s if dtype.is_a?(Symbol)
  result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")

  if eager
    return select(result).to_series
  end

  result
end

#last(*columns) ⇒ `Expr`

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘

df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names. If set to nil (default), returns an expression to take the last column of the context instead.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 532

def last(*columns)
  if columns.empty?
    return cs.last.as_expr
  end

  col(*columns).last
end

#len ⇒ `Expr` Also known as: length

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using `len` in conjunction with `int_range`.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

Returns:

(Expr)



44
45
46

# File 'lib/polars/functions/len.rb', line 44

def len
  Utils.wrap_expr(Plr.len)
end

#lit(value, dtype: nil, allow_object: nil) ⇒ `Expr`

Return an expression representing a literal value.

Returns:

(Expr)

# File 'lib/polars/functions/lit.rb', line 6

def lit(value, dtype: nil, allow_object: nil)
  if value.is_a?(::Time) || value.is_a?(::DateTime)
    time_unit = dtype&.time_unit || "ns"
    time_zone = dtype.&time_zone
    e = lit(Utils.datetime_to_int(value, time_unit)).cast(Datetime.new(time_unit))
    if time_zone
      return e.dt.replace_time_zone(time_zone.to_s)
    else
      return e
    end
  elsif value.is_a?(::Date)
    return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
  elsif value.is_a?(Polars::Series)
    value = value._s
    return Utils.wrap_expr(Plr.lit(value, allow_object, false))
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
    return Utils.wrap_expr(Plr.lit(Series.new("literal", [value.to_a], dtype: dtype)._s, allow_object, true))
  elsif dtype
    return Utils.wrap_expr(Plr.lit(value, allow_object, true)).cast(dtype)
  end

  Utils.wrap_expr(Plr.lit(value, allow_object, true))
end

#max(*names) ⇒ `Expr`

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)



135
136
137

# File 'lib/polars/functions/aggregation/vertical.rb', line 135

def max(*names)
  col(*names).max
end

#max_horizontal(*exprs) ⇒ `Expr`

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 103

def max_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.max_horizontal(rbexprs))
end

#mean(*columns) ⇒ `Expr` Also known as: avg

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘

df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)



285
286
287

# File 'lib/polars/functions/lazy.rb', line 285

def mean(*columns)
  col(*columns).mean
end

#mean_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr`

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 208

def mean_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
end

#median(*columns) ⇒ `Expr`

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)



329
330
331

# File 'lib/polars/functions/lazy.rb', line 329

def median(*columns)
  col(*columns).median
end

#min(*names) ⇒ `Expr`

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)



190
191
192

# File 'lib/polars/functions/aggregation/vertical.rb', line 190

def min(*names)
  col(*names).min
end

#min_horizontal(*exprs) ⇒ `Expr`

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 136

def min_horizontal(*exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.min_horizontal(rbexprs))
end

#n_unique(*columns) ⇒ `Expr`

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

columns (Array) —
One or more column names.

Returns:

(Expr)



372
373
374

# File 'lib/polars/functions/lazy.rb', line 372

def n_unique(*columns)
  col(*columns).n_unique
end

#nth(*indices, strict: true) ⇒ `Expr`

Get the nth column(s) of the context.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.nth(1))
# =>
# shape: (3, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# │ 5   │
# │ 2   │
# └─────┘

df.select(Polars.nth(2, 0))
# =>
# shape: (3, 2)
# ┌─────┬─────┐
# │ c   ┆ a   │
# │ --- ┆ --- │
# │ str ┆ i64 │
# ╞═════╪═════╡
# │ foo ┆ 1   │
# │ bar ┆ 8   │
# │ baz ┆ 3   │
# └─────┴─────┘

Parameters:

indices (Array) —
One or more indices representing the columns to retrieve.

Returns:

(Expr)



581
582
583

# File 'lib/polars/functions/lazy.rb', line 581

def nth(*indices, strict: true)
  cs.by_index(*indices, require_all: strict).as_expr
end

#ones(n, dtype: nil, eager: true) ⇒ `Object`

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

Parameters:

n (Integer) —
Length of the resulting column.
dtype (Object) (defaults to: nil) —
Data type of the resulting column. Defaults to Float64.
eager (Boolean) (defaults to: true) —
Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/repeat.rb', line 85

def ones(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
    msg = "invalid dtype for `ones`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
end

#quantile(column, quantile, interpolation: "nearest") ⇒ `Expr`

Syntactic sugar for Polars.col("foo").quantile(...).

Parameters:

column (String) —
Column name.
quantile (Float) —
Quantile between 0.0 and 1.0.
interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest") —
Interpolation method.

Returns:

(Expr)



1142
1143
1144

# File 'lib/polars/functions/lazy.rb', line 1142

def quantile(column, quantile, interpolation: "nearest")
  col(column).quantile(quantile, interpolation: interpolation)
end

#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ `Object`

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting `eager: true`.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

Parameters:

value (Object) —
Value to repeat.
n (Integer) —
Repeat n times.
dtype (Object) (defaults to: nil) —
Data type of the resulting column. If set to nil (default), data type is inferred from the given value. Defaults to Int32 for integer values, unless Int64 is required to fit the given value. Defaults to Float64 for float values.
eager (Boolean) (defaults to: false) —
Run eagerly and collect into a Series.
name (String) (defaults to: nil) —
Only used in eager mode. As expression, use alias.

Returns:

(Object)

# File 'lib/polars/functions/repeat.rb', line 41

def repeat(value, n, dtype: nil, eager: false, name: nil)
  if !name.nil?
    warn "the `name` argument is deprecated. Use the `alias` method instead."
  end

  if n.is_a?(Integer)
    n = lit(n)
  end

  value = Utils.parse_into_expression(value, str_as_lit: true)
  expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
  if !name.nil?
    expr = expr.alias(name)
  end
  if eager
    return select(expr).to_series
  end
  expr
end

#select(*exprs, **named_exprs) ⇒ `DataFrame`

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

Parameters:

exprs (Array) —
Column(s) to select, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
named_exprs (Hash) —
Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:

(DataFrame)



1331
1332
1333

# File 'lib/polars/functions/lazy.rb', line 1331

def select(*exprs, **named_exprs)
  DataFrame.new([]).select(*exprs, **named_exprs)
end

#set_random_seed(seed) ⇒ `nil`

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

Parameters:

seed (Integer) —
A non-negative integer < 2**64 used to seed the internal global random number generator.

Returns:

(nil)



12
13
14

# File 'lib/polars/functions/random.rb', line 12

def set_random_seed(seed)
  Plr.set_random_seed(seed)
end

#sql_expr(sql) ⇒ `Expr`

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ i64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4   ┆ 2     │
# │ 1   ┆ 1   ┆ 1     │
# └─────┴─────┴───────┘

Parameters:

sql (Object) —
One or more SQL expressions.

Returns:

(Expr)

# File 'lib/polars/functions/lazy.rb', line 1515

def sql_expr(sql)
  if sql.is_a?(::String)
    Utils.wrap_expr(Plr.sql_expr(sql))
  else
    sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
  end
end

#std(column, ddof: 1) ⇒ `Expr`

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘

df["a"].std
# => 3.605551275463989

Parameters:

column (Object) —
Column name.
ddof (Integer) (defaults to: 1) —
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)



202
203
204

# File 'lib/polars/functions/lazy.rb', line 202

def std(column, ddof: 1)
  col(column).std(ddof: ddof)
end

#struct(*exprs, schema: nil, eager: false, **named_exprs) ⇒ `Object`

Collect several columns into a Series of dtype Struct.

Examples:

df = Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
)
df.select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Collect selected columns into a struct by either passing a list of columns, or by specifying each column as a positional argument.

df.select(Polars.struct("int", false).alias("my_struct"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ my_struct │
# │ ---       │
# │ struct[2] │
# ╞═══════════╡
# │ {1,false} │
# │ {2,false} │
# └───────────┘

Use keyword arguments to easily name each struct field.

df.select(Polars.struct(p: "int", q: "bool").alias("my_struct")).schema
# => {"my_struct"=>Polars::Struct({"p"=>Polars::Int64, "q"=>Polars::Boolean})}

Parameters:

exprs (Array) —
Column(s) to collect into a struct column, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
schema (Hash) (defaults to: nil) —
Optional schema that explicitly defines the struct field dtypes. If no columns or expressions are provided, schema keys are used to define columns.
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.
named_exprs (Hash) —
Additional columns to collect into the struct column, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:

(Object)

# File 'lib/polars/functions/as_datatype.rb', line 198

def struct(*exprs, schema: nil, eager: false, **named_exprs)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs, **named_exprs)
  expr = Utils.wrap_expr(Plr.as_struct(rbexprs))

  if !schema.nil? && !schema.empty?
    if !exprs.any?
      # no columns or expressions provided; create one from schema keys
      expr =
        Utils.wrap_expr(
          Plr.as_struct(Utils.parse_into_list_of_expressions(schema.keys))
        )
      expr = expr.cast(Struct.new(schema), strict: false)
    end
  end

  if eager
    Polars.select(expr).to_series
  else
    expr
  end
end

#sum(*names) ⇒ `Expr`

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘

df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

Parameters:

names (Array) —
Name(s) of the columns to use in the aggregation.

Returns:

(Expr)



245
246
247

# File 'lib/polars/functions/aggregation/vertical.rb', line 245

def sum(*names)
  col(*names).sum
end

#sum_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr`

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

exprs (Array) —
Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.
ignore_nulls (Boolean) (defaults to: true) —
Ignore null values (default). If set to false, any null value in the input will lead to a null output.

Returns:

(Expr)

# File 'lib/polars/functions/aggregation/horizontal.rb', line 172

def sum_horizontal(*exprs, ignore_nulls: true)
  rbexprs = Utils.parse_into_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
end

#tail(column, n = 10) ⇒ `Expr`

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘

df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

Parameters:

column (Object) —
Column name.
n (Integer) (defaults to: 10) —
Number of rows to return.

Returns:

(Expr)



677
678
679

# File 'lib/polars/functions/lazy.rb', line 677

def tail(column, n = 10)
  col(column).tail(n)
end

#time_range(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ `Object`

Generate a time range.

Examples:

Polars.time_range(
  Time.utc(2000, 1, 1, 14, 0),
  nil,
  "3h15m",
  eager: true
).alias("time")
# =>
# shape: (4,)
# Series: 'time' [time]
# [
#         14:00:00
#         17:15:00
#         20:30:00
#         23:45:00
# ]

Parameters:

start (Object) (defaults to: nil) —
Lower bound of the time range.
stop (Object) (defaults to: nil) —
Upper bound of the time range.
interval (String) (defaults to: "1h") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to False (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/time_range.rb', line 35

def time_range(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_range(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#time_ranges(start = nil, stop = nil, interval = "1h", closed: "both", eager: false) ⇒ `Object`

Create a column of time ranges.

Examples:

df = Polars::DataFrame.new(
  {
    "start" => [Time.utc(2000, 1, 1, 9, 0), Time.utc(2000, 1, 1, 10, 0)],
    "end" => Time.utc(2000, 1, 1, 11, 0)
  }
)
df.select(time_range: Polars.time_ranges("start", "end"))
# =>
# shape: (2, 1)
# ┌────────────────────────────────┐
# │ time_range                     │
# │ ---                            │
# │ list[time]                     │
# ╞════════════════════════════════╡
# │ [09:00:00, 10:00:00, 11:00:00] │
# │ [10:00:00, 11:00:00]           │
# └────────────────────────────────┘

Parameters:

start (Object) (defaults to: nil) —
Lower bound of the time range.
stop (Object) (defaults to: nil) —
Upper bound of the time range.
interval (Integer) (defaults to: "1h") —
Interval of the range periods, specified using the Polars duration string language.
closed ('both', 'left', 'right', 'none') (defaults to: "both") —
Define which sides of the range are closed (inclusive).
eager (Boolean) (defaults to: false) —
Evaluate immediately and return a Series. If set to false (default), return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/range/time_range.rb', line 105

def time_ranges(
  start = nil,
  stop = nil,
  interval = "1h",
  closed: "both",
  eager: false
)
  interval = Utils.parse_interval_argument(interval)
  ["y", "mo", "w", "d"].each do |unit|
    if interval.include?(unit)
      msg = "invalid interval unit for time_range: found #{unit.inspect}"
      raise ArgumentError, msg
    end
  end

  if start.nil?
    # date part is ignored
    start = ::Time.utc(2000, 1, 1, 0, 0, 0)
  end
  if stop.nil?
    # date part is ignored
    stop = ::Time.utc(2000, 1, 1, 23, 59, 59, 999999)
  end

  start_rbexpr = Utils.parse_into_expression(start)
  end_rbexpr = Utils.parse_into_expression(stop)

  result = Utils.wrap_expr(Plr.time_ranges(start_rbexpr, end_rbexpr, interval, closed))

  if eager
    return Polars.select(result).to_series
  end

  result
end

#using_string_cache ⇒ `Boolean`

Check whether the global string cache is enabled.

Returns:

(Boolean)



97
98
99

# File 'lib/polars/string_cache.rb', line 97

def using_string_cache
  Plr.using_string_cache
end

#var(column, ddof: 1) ⇒ `Expr`

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘

df["a"].var
# => 13.0

Parameters:

column (Object) —
Column name.
ddof (Integer) (defaults to: 1) —
“Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:

(Expr)



241
242
243

# File 'lib/polars/functions/lazy.rb', line 241

def var(column, ddof: 1)
  col(column).var(ddof: ddof)
end

#when(*predicates, **constraints) ⇒ `When`

Start a "when, then, otherwise" expression.

Examples:

Below we add a column with the value 1, where column "foo" > 2 and the value -1 where it isn't.

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Or with multiple when-then operations chained:

df.with_columns(
  Polars.when(Polars.col("foo") > 2)
  .then(1)
  .when(Polars.col("bar") > 2)
  .then(4)
  .otherwise(-1)
  .alias("val")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 4   │
# │ 3   ┆ 4   ┆ 1   │
# │ 4   ┆ 0   ┆ 1   │
# └─────┴─────┴─────┘

The `otherwise` at the end is optional. If left out, any rows where none of the `when` expressions evaluate to True, are set to `null`:

df.with_columns(Polars.when(Polars.col("foo") > 2).then(1).alias("val"))
# =>
# shape: (3, 3)
# ┌─────┬─────┬──────┐
# │ foo ┆ bar ┆ val  │
# │ --- ┆ --- ┆ ---  │
# │ i64 ┆ i64 ┆ i32  │
# ╞═════╪═════╪══════╡
# │ 1   ┆ 3   ┆ null │
# │ 3   ┆ 4   ┆ 1    │
# │ 4   ┆ 0   ┆ 1    │
# └─────┴─────┴──────┘

Pass multiple predicates, each of which must be met:

df.with_columns(
  val: Polars.when(
    Polars.col("bar") > 0,
    Polars.col("foo") % 2 != 0
  )
  .then(99)
  .otherwise(-1)
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 99  │
# │ 3   ┆ 4   ┆ 99  │
# │ 4   ┆ 0   ┆ -1  │
# └─────┴─────┴─────┘

Pass conditions as keyword arguments:

df.with_columns(val: Polars.when(foo: 4, bar: 0).then(99).otherwise(-1))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────┐
# │ foo ┆ bar ┆ val │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i32 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ -1  │
# │ 3   ┆ 4   ┆ -1  │
# │ 4   ┆ 0   ┆ 99  │
# └─────┴─────┴─────┘

Returns:

(When)

# File 'lib/polars/functions/whenthen.rb', line 91

def when(*predicates, **constraints)
  condition = Utils.parse_predicates_constraints_into_expression(*predicates, **constraints)
  When.new(Plr.when(condition))
end

#zeros(n, dtype: nil, eager: true) ⇒ `Object`

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]

Parameters:

n (Integer) —
Length of the resulting column.
dtype (Object) (defaults to: nil) —
Data type of the resulting column. Defaults to Float64.
eager (Boolean) (defaults to: true) —
Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:

(Object)

# File 'lib/polars/functions/repeat.rb', line 118

def zeros(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
    msg = "invalid dtype for `zeros`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
end

Module: Polars::Functions

Instance Method Summary collapse

Instance Method Details

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object

Examples:

#all(*names, ignore_nulls: true) ⇒ Expr

Examples:

Selecting all columns.

Evaluate bitwise AND for a column.

#all_horizontal(*exprs) ⇒ Expr

Examples:

#any(*names, ignore_nulls: true) ⇒ Expr

Examples:

#any_horizontal(*exprs) ⇒ Expr

Examples:

#approx_n_unique(*columns) ⇒ Expr

Examples:

#arctan2(y, x) ⇒ Expr

Examples:

#arctan2d(y, x) ⇒ Expr

Examples:

#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ Expr Also known as: argsort_by

Examples:

Pass a single column name to compute the arg sort by that column.

Compute the arg sort by multiple columns by either passing a list of columns, or by specifying each column as a positional argument.

Use gather to apply the arg sort to other columns.

#arg_where(condition, eager: false) ⇒ Expr, Series

Examples:

#business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ Expr

Examples:

You can pass a custom weekend - for example, if you only take Sunday off:

You can also pass a list of holidays to exclude from the count:

#coalesce(exprs, *more_exprs) ⇒ Expr

Examples:

#col(name, *more_names) ⇒ Expr

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array

#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object

Examples:

#concat_list(exprs, *more_exprs) ⇒ Expr

Examples:

Concatenate two existing list columns. Null values are propagated.

Non-list columns are cast to a list before concatenation. The output data type is the supertype of the concatenated columns.

Create lagged columns and collect them into a list. This mimics a rolling window.

#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr

Examples:

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ Expr

Examples:

Pearson's correlation:

Spearman rank correlation:

#count(*columns) ⇒ Expr

Examples:

Return the number of non-null values in multiple columns.

#cov(a, b, ddof: 1) ⇒ Expr

Examples:

#cum_count(*columns, reverse: false) ⇒ Expr

Examples:

#cum_fold(acc, function, exprs, returns_scalar: false, return_dtype: nil, include_init: false) ⇒ Object Also known as: cumfold

Examples:

#cum_sum(*names) ⇒ Expr Also known as: cumsum

Examples:

#cum_sum_horizontal(*exprs) ⇒ Expr Also known as: cumsum_horizontal

Examples:

#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Examples:

Using polars duration string to specify the interval

#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ Object

Examples:

#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Examples:

Using Polars duration string to specify the interval:

Specifying a time zone:

#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ Object

Examples:

#disable_string_cache ⇒ nil

Examples:

Construct two Series using the same global string cache.

As both Series are constructed under the same global string cache, they can be concatenated.

#dtype_of(col_or_expr) ⇒ DataTypeExpr

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr

Examples:

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ `Object`

#all(*names, ignore_nulls: true) ⇒ `Expr`

#all_horizontal(*exprs) ⇒ `Expr`

#any(*names, ignore_nulls: true) ⇒ `Expr`

#any_horizontal(*exprs) ⇒ `Expr`

#approx_n_unique(*columns) ⇒ `Expr`

#arctan2(y, x) ⇒ `Expr`

#arctan2d(y, x) ⇒ `Expr`

#arg_sort_by(exprs, *more_exprs, reverse: false, nulls_last: false, multithreaded: true, maintain_order: false) ⇒ `Expr` Also known as: argsort_by

#arg_where(condition, eager: false) ⇒ `Expr`, `Series`

#business_day_count(start, stop, week_mask: [true, true, true, true, true, false, false], holidays: []) ⇒ `Expr`

#coalesce(exprs, *more_exprs) ⇒ `Expr`

#col(name, *more_names) ⇒ `Expr`

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ `Array`

#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ `Object`

#concat_list(exprs, *more_exprs) ⇒ `Expr`

#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ `Expr`

#corr(a, b, method: "pearson", ddof: nil, propagate_nans: false) ⇒ `Expr`

#count(*columns) ⇒ `Expr`

#cov(a, b, ddof: 1) ⇒ `Expr`

#cum_count(*columns, reverse: false) ⇒ `Expr`

#cum_fold(acc, function, exprs, returns_scalar: false, return_dtype: nil, include_init: false) ⇒ `Object` Also known as: cumfold

#cum_sum(*names) ⇒ `Expr` Also known as: cumsum

#cum_sum_horizontal(*exprs) ⇒ `Expr` Also known as: cumsum_horizontal

#date_range(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

#date_ranges(start, stop, interval = "1d", closed: "both", eager: false) ⇒ `Object`

#datetime_range(start, stop, interval = "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

#datetime_ranges(start, stop, interval: "1d", closed: "both", time_unit: nil, time_zone: nil, eager: false) ⇒ `Object`

#disable_string_cache ⇒ `nil`

#dtype_of(col_or_expr) ⇒ `DataTypeExpr`

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ `Expr`

#element ⇒ `Expr`

#enable_string_cache ⇒ `nil`

#exclude(columns) ⇒ `Object`

#field(name) ⇒ `Expr`

#first(*columns) ⇒ `Expr`

#fold(acc, function, exprs, returns_scalar: false, return_dtype: nil) ⇒ `Expr`

#format(f_string, *args) ⇒ `Expr`

#from_epoch(column, unit: "s", eager: false) ⇒ `Object`

#groups(column) ⇒ `Object`

#head(column, n = 10) ⇒ `Expr`

#implode(*columns) ⇒ `Expr`

#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ `Expr`, `Series` Also known as: arange

#last(*columns) ⇒ `Expr`

#len ⇒ `Expr` Also known as: length

Generate an index column by using `len` in conjunction with `int_range`.

#lit(value, dtype: nil, allow_object: nil) ⇒ `Expr`

#max(*names) ⇒ `Expr`

#max_horizontal(*exprs) ⇒ `Expr`

#mean(*columns) ⇒ `Expr` Also known as: avg

#mean_horizontal(*exprs, ignore_nulls: true) ⇒ `Expr`

#median(*columns) ⇒ `Expr`

#min(*names) ⇒ `Expr`

#min_horizontal(*exprs) ⇒ `Expr`

#n_unique(*columns) ⇒ `Expr`

#nth(*indices, strict: true) ⇒ `Expr`

#ones(n, dtype: nil, eager: true) ⇒ `Object`

#quantile(column, quantile, interpolation: "nearest") ⇒ `Expr`

#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ `Object`

Generate a Series directly by setting `eager: true`.

#select(*exprs, **named_exprs) ⇒ `DataFrame`

#set_random_seed(seed) ⇒ `nil`

#sql_expr(sql) ⇒ `Expr`