Module: Polars::Functions

Included in:
Polars
Defined in:
lib/polars/functions.rb,
lib/polars/string_cache.rb,
lib/polars/functions/col.rb,
lib/polars/functions/len.rb,
lib/polars/functions/lit.rb,
lib/polars/functions/lazy.rb,
lib/polars/functions/eager.rb,
lib/polars/functions/random.rb,
lib/polars/functions/repeat.rb,
lib/polars/functions/whenthen.rb,
lib/polars/functions/as_datatype.rb,
lib/polars/functions/range/int_range.rb,
lib/polars/functions/range/date_range.rb,
lib/polars/functions/aggregation/vertical.rb,
lib/polars/functions/aggregation/horizontal.rb

Instance Method Summary collapse

Instance Method Details

#align_frames(*frames, on:, select: nil, reverse: false) ⇒ Object

Align a sequence of frames using the uique values from one or more columns as a key.

Frames that do not contain the given key values have rows injected (with nulls filling the non-key columns), and each resulting frame is sorted by the key.

The original column order of input frames is not changed unless select is specified (in which case the final column order is determined from that).

Note that this does not result in a joined frame - you receive the same number of frames back that you passed in, but each is now aligned by key and has the same number of rows.

Examples:

df1 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 1), Date.new(2022, 9, 2), Date.new(2022, 9, 3)],
    "x" => [3.5, 4.0, 1.0],
    "y" => [10.0, 2.5, 1.5]
  }
)
df2 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 2), Date.new(2022, 9, 3), Date.new(2022, 9, 1)],
    "x" => [8.0, 1.0, 3.5],
    "y" => [1.5, 12.0, 5.0]
  }
)
df3 = Polars::DataFrame.new(
  {
    "dt" => [Date.new(2022, 9, 3), Date.new(2022, 9, 2)],
    "x" => [2.0, 5.0],
    "y" => [2.5, 2.0]
  }
)
af1, af2, af3 = Polars.align_frames(
  df1, df2, df3, on: "dt", select: ["x", "y"]
)
(af1 * af2 * af3).fill_null(0).select(Polars.sum(Polars.col("*")).alias("dot"))
# =>
# shape: (3, 1)
# ┌───────┐
# │ dot   │
# │ ---   │
# │ f64   │
# ╞═══════╡
# │ 0.0   │
# ├╌╌╌╌╌╌╌┤
# │ 167.5 │
# ├╌╌╌╌╌╌╌┤
# │ 47.0  │
# └───────┘

Parameters:

  • frames (Array)

    Sequence of DataFrames or LazyFrames.

  • on (Object)

    One or more columns whose unique values will be used to align the frames.

  • select (Object) (defaults to: nil)

    Optional post-alignment column select to constrain and/or order the columns returned from the newly aligned frames.

  • reverse (Object) (defaults to: false)

    Sort the alignment column values in descending order; can be a single boolean or a list of booleans associated with each column in on.

Returns:



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/polars/functions/eager.rb', line 144

def align_frames(
  *frames,
  on:,
  select: nil,
  reverse: false
)
  if frames.empty?
    return []
  elsif frames.map(&:class).uniq.length != 1
    raise TypeError, "Input frames must be of a consistent type (all LazyFrame or all DataFrame)"
  end

  # establish the superset of all "on" column values, sort, and cache
  eager = frames[0].is_a?(DataFrame)
  alignment_frame = (
    concat(frames.map { |df| df.lazy.select(on) })
      .unique(maintain_order: false)
      .sort(on, reverse: reverse)
  )
  alignment_frame = (
    eager ? alignment_frame.collect.lazy : alignment_frame.cache
  )
  # finally, align all frames
  aligned_frames =
    frames.map do |df|
      alignment_frame.join(
        df.lazy,
        on: alignment_frame.columns,
        how: "left"
      ).select(df.columns)
    end
  if !select.nil?
    aligned_frames = aligned_frames.map { |df| df.select(select) }
  end

  eager ? aligned_frames.map(&:collect) : aligned_frames
end

#all(*names, ignore_nulls: true) ⇒ Expr

Either return an expression representing all columns, or evaluate a bitwise AND operation.

If no arguments are passed, this function is syntactic sugar for col("*"). Otherwise, this function is syntactic sugar for col(names).all.

Examples:

Selecting all columns.

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.all.sum)
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 2   ┆ 0   │
# └─────┴─────┘

Evaluate bitwise AND for a column.

df.select(Polars.all("a"))
# =>
# shape: (1, 1)
# ┌───────┐
# │ a     │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ false │
# └───────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



44
45
46
47
48
49
50
# File 'lib/polars/functions/aggregation/vertical.rb', line 44

def all(*names, ignore_nulls: true)
  if names.empty?
    return col("*")
  end

  col(*names).all(drop_nulls: ignore_nulls)
end

#all_horizontal(*exprs) ⇒ Expr

Compute the bitwise AND horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(all: Polars.all_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ all   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ false │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ null  │
# │ false ┆ null  ┆ y   ┆ false │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



34
35
36
37
# File 'lib/polars/functions/aggregation/horizontal.rb', line 34

def all_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.all_horizontal(rbexprs))
end

#any(*names, ignore_nulls: true) ⇒ Expr

Evaluate a bitwise OR operation.

Syntactic sugar for col(names).any.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [true, false, true],
    "b" => [false, false, false]
  }
)
df.select(Polars.any("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ bool │
# ╞══════╡
# │ true │
# └──────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

  • ignore_nulls (Boolean) (defaults to: true)

    Ignore null values (default).

Returns:



80
81
82
# File 'lib/polars/functions/aggregation/vertical.rb', line 80

def any(*names, ignore_nulls: true)
  col(*names).any(drop_nulls: ignore_nulls)
end

#any_horizontal(*exprs) ⇒ Expr

Compute the bitwise OR horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [false, false, true, true, false, nil],
    "b" => [false, true, true, nil, nil, nil],
    "c" => ["u", "v", "w", "x", "y", "z"]
  }
)
df.with_columns(any: Polars.any_horizontal("a", "b"))
# =>
# shape: (6, 4)
# ┌───────┬───────┬─────┬───────┐
# │ a     ┆ b     ┆ c   ┆ any   │
# │ ---   ┆ ---   ┆ --- ┆ ---   │
# │ bool  ┆ bool  ┆ str ┆ bool  │
# ╞═══════╪═══════╪═════╪═══════╡
# │ false ┆ false ┆ u   ┆ false │
# │ false ┆ true  ┆ v   ┆ true  │
# │ true  ┆ true  ┆ w   ┆ true  │
# │ true  ┆ null  ┆ x   ┆ true  │
# │ false ┆ null  ┆ y   ┆ null  │
# │ null  ┆ null  ┆ z   ┆ null  │
# └───────┴───────┴─────┴───────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



70
71
72
73
# File 'lib/polars/functions/aggregation/horizontal.rb', line 70

def any_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.any_horizontal(rbexprs))
end

#approx_n_unique(*columns) ⇒ Expr

Approximate count of unique values.

This function is syntactic sugar for col(columns).approx_n_unique, and uses the HyperLogLog++ algorithm for cardinality estimation.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.approx_n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.approx_n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



403
404
405
# File 'lib/polars/functions/lazy.rb', line 403

def approx_n_unique(*columns)
  col(*columns).approx_n_unique
end

#arctan2(y, x) ⇒ Expr

Compute two argument arctan in radians.

Returns the angle (in radians) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

  • y (Object)

    Column name or Expression.

  • x (Object)

    Column name or Expression.

Returns:



827
828
829
830
831
832
833
834
835
# File 'lib/polars/functions/lazy.rb', line 827

def arctan2(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2(y._rbexpr, x._rbexpr))
end

#arctan2d(y, x) ⇒ Expr

Compute two argument arctan in degrees.

Returns the angle (in degrees) in the plane between the positive x-axis and the ray from the origin to (x,y).

Examples:

twoRootTwo = Math.sqrt(2) / 2
df = Polars::DataFrame.new(
  {
    "y" => [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
    "x" => [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo]
  }
)
df.select(
  Polars.arctan2d("y", "x").alias("atan2d"), Polars.arctan2("y", "x").alias("atan2")
)
# =>
# shape: (4, 2)
# ┌────────┬───────────┐
# │ atan2d ┆ atan2     │
# │ ---    ┆ ---       │
# │ f64    ┆ f64       │
# ╞════════╪═══════════╡
# │ 45.0   ┆ 0.785398  │
# │ -45.0  ┆ -0.785398 │
# │ 135.0  ┆ 2.356194  │
# │ -135.0 ┆ -2.356194 │
# └────────┴───────────┘

Parameters:

  • y (Object)

    Column name or Expression.

  • x (Object)

    Column name or Expression.

Returns:



872
873
874
875
876
877
878
879
880
# File 'lib/polars/functions/lazy.rb', line 872

def arctan2d(y, x)
  if Utils.strlike?(y)
    y = col(y)
  end
  if Utils.strlike?(x)
    x = col(x)
  end
  Utils.wrap_expr(Plr.arctan2d(y._rbexpr, x._rbexpr))
end

#arg_sort_by(exprs, reverse: false) ⇒ Expr Also known as: argsort_by

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.

Parameters:

  • exprs (Object)

    Columns use to determine the ordering.

  • reverse (Boolean) (defaults to: false)

    Default is ascending.

Returns:



978
979
980
981
982
983
984
985
986
987
# File 'lib/polars/functions/lazy.rb', line 978

def arg_sort_by(exprs, reverse: false)
  if !exprs.is_a?(::Array)
    exprs = [exprs]
  end
  if reverse == true || reverse == false
    reverse = [reverse] * exprs.length
  end
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.arg_sort_by(exprs, reverse))
end

#arg_where(condition, eager: false) ⇒ Expr, Series

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]

Parameters:

  • condition (Expr)

    Boolean expression to evaluate

  • eager (Boolean) (defaults to: false)

    Whether to apply this function eagerly (as opposed to lazily).

Returns:



1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
# File 'lib/polars/functions/lazy.rb', line 1115

def arg_where(condition, eager: false)
  if eager
    if !condition.is_a?(Series)
      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager: true', got #{condition.class.name}"
    end
    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
  else
    condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
    Utils.wrap_expr(Plr.arg_where(condition._rbexpr))
  end
end

#coalesce(exprs, *more_exprs) ⇒ Expr

Folds the columns from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, nil, nil, nil],
    "b" => [1, 2, nil, nil],
    "c" => [5, nil, 3, nil]
  }
)
df.with_columns(Polars.coalesce(["a", "b", "c", 10]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬─────┐
# │ a    ┆ b    ┆ c    ┆ d   │
# │ ---  ┆ ---  ┆ ---  ┆ --- │
# │ i64  ┆ i64  ┆ i64  ┆ i64 │
# ╞══════╪══════╪══════╪═════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1   │
# │ null ┆ 2    ┆ null ┆ 2   │
# │ null ┆ null ┆ 3    ┆ 3   │
# │ null ┆ null ┆ null ┆ 10  │
# └──────┴──────┴──────┴─────┘
df.with_columns(Polars.coalesce(Polars.col(["a", "b", "c"]), 10.0).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ i64  ┆ i64  ┆ i64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ 1    ┆ 1    ┆ 5    ┆ 1.0  │
# │ null ┆ 2    ┆ null ┆ 2.0  │
# │ null ┆ null ┆ 3    ┆ 3.0  │
# │ null ┆ null ┆ null ┆ 10.0 │
# └──────┴──────┴──────┴──────┘

Parameters:

  • exprs (Array)

    Columns to coalesce. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • more_exprs (Hash)

    Additional columns to coalesce, specified as positional arguments.

Returns:



1173
1174
1175
1176
# File 'lib/polars/functions/lazy.rb', line 1173

def coalesce(exprs, *more_exprs)
  exprs = Utils.parse_as_list_of_expressions(exprs, *more_exprs)
  Utils.wrap_expr(Plr.coalesce(exprs))
end

#col(name, *more_names) ⇒ Expr

Return an expression representing a column in a DataFrame.

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/polars/functions/col.rb', line 6

def col(name, *more_names)
  if more_names.any?
    if Utils.strlike?(name)
      names_str = [name]
      names_str.concat(more_names)
      return Utils.wrap_expr(Plr.cols(names_str.map(&:to_s)))
    elsif Utils.is_polars_dtype(name)
      dtypes = [name]
      dtypes.concat(more_names)
      return Utils.wrap_expr(Plr.dtype_cols(dtypes))
    else
      msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
      raise TypeError, msg
    end
  end

  if Utils.strlike?(name)
    Utils.wrap_expr(Plr.col(name.to_s))
  elsif Utils.is_polars_dtype(name)
    Utils.wrap_expr(Plr.dtype_cols([name]))
  elsif name.is_a?(::Array)
    names = Array(name)
    if names.empty?
      return Utils.wrap_expr(Plr.cols(names))
    end

    item = names[0]
    if Utils.strlike?(item)
      Utils.wrap_expr(Plr.cols(names.map(&:to_s)))
    elsif Utils.is_polars_dtype(item)
      Utils.wrap_expr(Plr.dtype_cols(names))
    else
      msg = "invalid input for `col`\n\nExpected iterable of type `str` or `DataType`, got iterable of type #{item.class.name}."
      raise TypeError, msg
    end
  else
    msg = "invalid input for `col`\n\nExpected `str` or `DataType`, got #{name.class.name}."
    raise TypeError, msg
  end
end

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.

Parameters:

  • lazy_frames (Boolean)

    A list of LazyFrames to collect.

  • type_coercion (Boolean) (defaults to: true)

    Do type coercion optimization.

  • predicate_pushdown (Boolean) (defaults to: true)

    Do predicate pushdown optimization.

  • projection_pushdown (Boolean) (defaults to: true)

    Do projection pushdown optimization.

  • simplify_expression (Boolean) (defaults to: true)

    Run simplify expressions optimization.

  • string_cache (Boolean) (defaults to: false)

    This argument is deprecated and will be ignored

  • no_optimization (Boolean) (defaults to: false)

    Turn off optimizations.

  • slice_pushdown (Boolean) (defaults to: true)

    Slice pushdown optimization.

  • common_subplan_elimination (Boolean) (defaults to: true)

    Will try to cache branching subplans that occur on self-joins or unions.

  • allow_streaming (Boolean) (defaults to: false)

    Run parts of the query in a streaming fashion (this is in an alpha state)

Returns:



1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
# File 'lib/polars/functions/lazy.rb', line 1016

def collect_all(
  lazy_frames,
  type_coercion: true,
  predicate_pushdown: true,
  projection_pushdown: true,
  simplify_expression: true,
  string_cache: false,
  no_optimization: false,
  slice_pushdown: true,
  common_subplan_elimination: true,
  allow_streaming: false
)
  if no_optimization
    predicate_pushdown = false
    projection_pushdown = false
    slice_pushdown = false
    common_subplan_elimination = false
  end

  prepared = []

  lazy_frames.each do |lf|
    ldf = lf._ldf.optimization_toggle(
      type_coercion,
      predicate_pushdown,
      projection_pushdown,
      simplify_expression,
      slice_pushdown,
      common_subplan_elimination,
      allow_streaming,
      false
    )
    prepared << ldf
  end

  out = Plr.collect_all(prepared)

  # wrap the rbdataframes into dataframe
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }

  result
end

#concat(items, rechunk: true, how: "vertical", parallel: true) ⇒ Object

Aggregate multiple Dataframes/Series to a single DataFrame/Series.

Examples:

df1 = Polars::DataFrame.new({"a" => [1], "b" => [3]})
df2 = Polars::DataFrame.new({"a" => [2], "b" => [4]})
Polars.concat([df1, df2])
# =>
# shape: (2, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# │ 2   ┆ 4   │
# └─────┴─────┘

Parameters:

  • items (Object)

    DataFrames/Series/LazyFrames to concatenate.

  • rechunk (Boolean) (defaults to: true)

    Make sure that all data is in contiguous memory.

  • how ("vertical", "vertical_relaxed", "diagonal", "horizontal") (defaults to: "vertical")

    LazyFrames do not support the horizontal strategy.

    • Vertical: applies multiple vstack operations.
    • Diagonal: finds a union between the column schemas and fills missing column values with null.
    • Horizontal: stacks Series horizontally and fills with nulls if the lengths don't match.
  • parallel (Boolean) (defaults to: true)

    Only relevant for LazyFrames. This determines if the concatenated lazy computations may be executed in parallel.

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/polars/functions/eager.rb', line 35

def concat(items, rechunk: true, how: "vertical", parallel: true)
  if items.empty?
    raise ArgumentError, "cannot concat empty list"
  end

  first = items[0]
  if first.is_a?(DataFrame)
    if how == "vertical"
      out = Utils.wrap_df(Plr.concat_df(items))
    elsif how == "diagonal"
      out = Utils.wrap_df(Plr.concat_df_diagonal(items))
    elsif how == "horizontal"
      out = Utils.wrap_df(Plr.concat_df_horizontal(items))
    else
      raise ArgumentError, "how must be one of {{'vertical', 'diagonal', 'horizontal'}}, got #{how}"
    end
  elsif first.is_a?(LazyFrame)
    if how == "vertical"
      return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, false))
    elsif how == "vertical_relaxed"
      return Utils.wrap_ldf(Plr.concat_lf(items, rechunk, parallel, true))
    elsif how == "diagonal"
      return Utils.wrap_ldf(Plr.concat_lf_diagonal(items, rechunk, parallel, false))
    else
      raise ArgumentError, "Lazy only allows 'vertical', 'vertical_relaxed', and 'diagonal' concat strategy."
    end
  elsif first.is_a?(Series)
    # TODO
    out = Utils.wrap_s(Plr.concat_series(items))
  elsif first.is_a?(Expr)
    out = first
    items[1..-1].each do |e|
      out = out.append(e)
    end
  else
    raise ArgumentError, "did not expect type: #{first.class.name} in 'Polars.concat'."
  end

  if rechunk
    out.rechunk
  else
    out
  end
end

#concat_list(exprs) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.

Returns:



89
90
91
92
# File 'lib/polars/functions/as_datatype.rb', line 89

def concat_list(exprs)
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.concat_list(exprs))
end

#concat_str(exprs, sep: "", ignore_nulls: false) ⇒ Expr

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      sep: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘

Parameters:

  • exprs (Object)

    Columns to concat into a Utf8 Series.

  • sep (String) (defaults to: "")

    String value that will be used to separate the values.

  • ignore_nulls (Boolean) (defaults to: false)

    Ignore null values (default).

Returns:



190
191
192
193
# File 'lib/polars/functions/as_datatype.rb', line 190

def concat_str(exprs, sep: "", ignore_nulls: false)
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.concat_str(exprs, sep, ignore_nulls))
end

#corr(a, b, method: "pearson", ddof: 1, propagate_nans: false) ⇒ Expr

Compute the Pearson's or Spearman rank correlation correlation between two columns.

Examples:

Pearson's correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 0.544705 │
# └──────────┘

Spearman rank correlation:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.corr("a", "b", method: "spearman"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 0.5 │
# └─────┘

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

  • method ("pearson", "spearman") (defaults to: "pearson")

    Correlation method.

  • propagate_nans (Boolean) (defaults to: false)

    If true any NaN encountered will lead to NaN in the output. Defaults to False where NaN are regarded as larger than any finite number and thus lead to the highest rank.

Returns:



679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
# File 'lib/polars/functions/lazy.rb', line 679

def corr(
  a,
  b,
  method: "pearson",
  ddof: 1,
  propagate_nans: false
)
  a = Utils.parse_as_expression(a)
  b = Utils.parse_as_expression(b)

  if method == "pearson"
    Utils.wrap_expr(Plr.pearson_corr(a, b, ddof))
  elsif method == "spearman"
    Utils.wrap_expr(Plr.spearman_rank_corr(a, b, ddof, propagate_nans))
  else
    msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
    raise ArgumentError, msg
  end
end

#count(*columns) ⇒ Expr

Return the number of non-null values in the column.

This function is syntactic sugar for col(columns).count.

Calling this function without any arguments returns the number of rows in the context. This way of using the function is deprecated. Please use len instead.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.count("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘

Return the number of non-null values in multiple columns.

df.select(Polars.count("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 1   ┆ 3   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



70
71
72
73
74
75
76
77
# File 'lib/polars/functions/lazy.rb', line 70

def count(*columns)
  if columns.empty?
    warn "`Polars.count` is deprecated. Use `Polars.length` instead."
    return Utils.wrap_expr(Plr.len._alias("count"))
  end

  col(*columns).count
end

#cov(a, b, ddof: 1) ⇒ Expr

Compute the covariance between two columns/ expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.cov("a", "b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    "Delta Degrees of Freedom": the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



730
731
732
733
734
# File 'lib/polars/functions/lazy.rb', line 730

def cov(a, b, ddof: 1)
  a = Utils.parse_as_expression(a)
  b = Utils.parse_as_expression(b)
  Utils.wrap_expr(Plr.cov(a, b, ddof))
end

#cum_count(*columns, reverse: false) ⇒ Expr

Return the cumulative count of the non-null values in the column.

This function is syntactic sugar for col(columns).cum_count.

If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, nil], "b" => [3, nil, nil]})
df.select(Polars.cum_count("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 2   │
# └─────┘

Parameters:

  • columns (Array)

    Name(s) of the columns to use.

  • reverse (Boolean) (defaults to: false)

    Reverse the operation.

Returns:



107
108
109
# File 'lib/polars/functions/lazy.rb', line 107

def cum_count(*columns, reverse: false)
  col(*columns).cum_count(reverse: reverse)
end

#cum_fold(acc, f, exprs, include_init: false) ⇒ Object Also known as: cumfold

Note:

If you simply want the first encountered expression as accumulator, consider using cumreduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.

Parameters:

  • acc (Object)

    Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0).

  • f (Object)

    Function to apply over the accumulator and the value. Fn(acc, value) -> new_value

  • exprs (Object)

    Expressions to aggregate over. May also be a wildcard expression.

  • include_init (Boolean) (defaults to: false)

    Include the initial accumulator state as struct field.

Returns:



778
779
780
781
782
783
784
785
786
# File 'lib/polars/functions/lazy.rb', line 778

def cum_fold(acc, f, exprs, include_init: false)
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.cum_fold(acc._rbexpr, f, exprs, include_init))
end

#cum_sum(*names) ⇒ Expr Also known as: cumsum

Cumulatively sum all values.

Syntactic sugar for col(names).cum_sum.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [4, 5, 6]
  }
)
df.select(Polars.cum_sum("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 3   │
# │ 6   │
# └─────┘

Parameters:

  • names (Object)

    Name(s) of the columns to use in the aggregation.

Returns:



277
278
279
# File 'lib/polars/functions/aggregation/vertical.rb', line 277

def cum_sum(*names)
  col(*names).cum_sum
end

#cum_sum_horizontal(*exprs) ⇒ Expr Also known as: cumsum_horizontal

Cumulatively sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(Polars.cum_sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬───────────┐
# │ a   ┆ b    ┆ c   ┆ cum_sum   │
# │ --- ┆ ---  ┆ --- ┆ ---       │
# │ i64 ┆ i64  ┆ str ┆ struct[2] │
# ╞═════╪══════╪═════╪═══════════╡
# │ 1   ┆ 4    ┆ x   ┆ {1,5}     │
# │ 8   ┆ 5    ┆ y   ┆ {8,13}    │
# │ 3   ┆ null ┆ z   ┆ {3,null}  │
# └─────┴──────┴─────┴───────────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



235
236
237
238
239
240
241
242
243
# File 'lib/polars/functions/aggregation/horizontal.rb', line 235

def cum_sum_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  exprs_wrapped = rbexprs.map { |e| Utils.wrap_expr(e) }

  # (Expr): use u32 as that will not cast to float as eagerly
  Polars.cum_fold(Polars.lit(0).cast(UInt32), -> (a, b) { a + b }, exprs_wrapped).alias(
    "cum_sum"
  )
end

#date_range(start, stop, interval, lazy: false, closed: "both", name: nil, time_unit: nil, time_zone: nil) ⇒ Object

Note:

If both low and high are passed as date types (not datetime), and the interval granularity is no finer than 1d, the returned range is also of type date. All other permutations return a datetime Series.

Create a range of type Datetime (or Date).

Examples:

Using polars duration string to specify the interval

Polars.date_range(Date.new(2022, 1, 1), Date.new(2022, 3, 1), "1mo", name: "drange")
# =>
# shape: (3,)
# Series: 'drange' [date]
# [
#         2022-01-01
#         2022-02-01
#         2022-03-01
# ]

Using timedelta object to specify the interval:

Polars.date_range(
    DateTime.new(1985, 1, 1),
    DateTime.new(1985, 1, 10),
    "1d12h",
    time_unit: "ms"
)
# =>
# shape: (7,)
# Series: '' [datetime[ms]]
# [
#         1985-01-01 00:00:00
#         1985-01-02 12:00:00
#         1985-01-04 00:00:00
#         1985-01-05 12:00:00
#         1985-01-07 00:00:00
#         1985-01-08 12:00:00
#         1985-01-10 00:00:00
# ]

Parameters:

  • start (Object)

    Lower bound of the date range.

  • stop (Object)

    Upper bound of the date range.

  • interval (Object)

    Interval periods. It can be a polars duration string, such as 3d12h4m25s representing 3 days, 12 hours, 4 minutes, and 25 seconds.

  • lazy (Boolean) (defaults to: false)

    Return an expression.

  • closed ("both", "left", "right", "none") (defaults to: "both")

    Define whether the temporal window interval is closed or not.

  • name (String) (defaults to: nil)

    Name of the output Series.

  • time_unit (nil, "ns", "us", "ms") (defaults to: nil)

    Set the time unit.

  • time_zone (String) (defaults to: nil)

    Optional timezone

Returns:



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/polars/functions/range/date_range.rb', line 60

def date_range(
  start,
  stop,
  interval,
  lazy: false,
  closed: "both",
  name: nil,
  time_unit: nil,
  time_zone: nil
)
  if defined?(ActiveSupport::Duration) && interval.is_a?(ActiveSupport::Duration)
    raise Todo
  else
    interval = interval.to_s
    if interval.include?(" ")
      interval = interval.gsub(" ", "")
    end
  end

  if time_unit.nil?
    if interval.include?("ns")
      time_unit = "ns"
    else
      time_unit = "us"
    end
  end

  start_rbexpr = Utils.parse_as_expression(start)
  stop_rbexpr = Utils.parse_as_expression(stop)

  result = Utils.wrap_expr(
    Plr.date_range(start_rbexpr, stop_rbexpr, interval, closed, time_unit, time_zone)
  )

  result = result.alias(name.to_s)

  if !lazy
    return select(result).to_series
  end

  result
end

#disable_string_cachenil

Disable and clear the global string cache.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


64
65
66
# File 'lib/polars/string_cache.rb', line 64

def disable_string_cache
  Plr.disable_string_cache
end

#duration(weeks: nil, days: nil, hours: nil, minutes: nil, seconds: nil, milliseconds: nil, microseconds: nil, nanoseconds: nil, time_unit: "us") ⇒ Expr

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/polars/functions/as_datatype.rb', line 35

def duration(
  weeks: nil,
  days: nil,
  hours: nil,
  minutes: nil,
  seconds: nil,
  milliseconds: nil,
  microseconds: nil,
  nanoseconds: nil,
  time_unit: "us"
)
  if !weeks.nil?
    weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
  end
  if !days.nil?
    days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
  end
  if !hours.nil?
    hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
  end
  if !minutes.nil?
    minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
  end
  if !seconds.nil?
    seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
  end
  if !milliseconds.nil?
    milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
  end
  if !microseconds.nil?
    microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
  end
  if !nanoseconds.nil?
    nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
  end

  Utils.wrap_expr(
    Plr.duration(
      weeks,
      days,
      hours,
      minutes,
      seconds,
      milliseconds,
      microseconds,
      nanoseconds,
      time_unit
    )
  )
end

#elementExpr

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘

Returns:



23
24
25
# File 'lib/polars/functions/lazy.rb', line 23

def element
  col("")
end

#enable_string_cachenil

Enable the global string cache.

Categorical columns created under the same global string cache have the same underlying physical value when string values are equal. This allows the columns to be concatenated or used in a join operation, for example.

Examples:

Construct two Series using the same global string cache.

Polars.enable_string_cache
s1 = Polars::Series.new("color", ["red", "green", "red"], dtype: Polars::Categorical)
s2 = Polars::Series.new("color", ["blue", "red", "green"], dtype: Polars::Categorical)
Polars.disable_string_cache

As both Series are constructed under the same global string cache, they can be concatenated.

Polars.concat([s1, s2])
# =>
# shape: (6,)
# Series: 'color' [cat]
# [
#         "red"
#         "green"
#         "red"
#         "blue"
#         "red"
#         "green"
# ]

Returns:

  • (nil)


37
38
39
# File 'lib/polars/string_cache.rb', line 37

def enable_string_cache
  Plr.enable_string_cache
end

#exclude(columns) ⇒ Object

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# │ 2   ┆ b    ┆ 2.5  │
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# │ 2   ┆ 2.5  │
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# │ 2.5  │
# │ 1.5  │
# └──────┘

Parameters:

  • columns (Object)

    Column(s) to exclude from selection This can be:

    • a column name, or multiple column names
    • a regular expression starting with ^ and ending with $
    • a dtype or multiple dtypes

Returns:



941
942
943
# File 'lib/polars/functions/lazy.rb', line 941

def exclude(columns)
  col("*").exclude(columns)
end

#first(*columns) ⇒ Expr

Get the first value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.first)
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.first("b"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ b   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 4   │
# └─────┘
df.select(Polars.first("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 1   ┆ foo │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If not provided (default), returns an expression to take the first column of the context instead.

Returns:



459
460
461
462
463
464
465
# File 'lib/polars/functions/lazy.rb', line 459

def first(*columns)
  if columns.empty?
    return Utils.wrap_expr(Plr.first)
  end

  col(*columns).first
end

#fold(acc, f, exprs) ⇒ Expr

Accumulate over multiple columns horizontally/row wise with a left fold.

Returns:



745
746
747
748
749
750
751
752
753
# File 'lib/polars/functions/lazy.rb', line 745

def fold(acc, f, exprs)
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.fold(acc._rbexpr, f, exprs))
end

#format(fstring, *args) ⇒ Expr

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# │ foo_b_bar_2 │
# │ foo_c_bar_3 │
# └─────────────┘

Parameters:

  • fstring (String)

    A string that with placeholders. For example: "hello_{}" or "{}_world

  • args (Object)

    Expression(s) that fill the placeholders

Returns:



228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/polars/functions/as_datatype.rb', line 228

def format(fstring, *args)
  if fstring.scan("{}").length != args.length
    raise ArgumentError, "number of placeholders should equal the number of arguments"
  end

  exprs = []

  arguments = args.each
  fstring.split(/(\{\})/).each do |s|
    if s == "{}"
      e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
      exprs << e
    elsif s.length > 0
      exprs << lit(s)
    end
  end

  concat_str(exprs, sep: "")
end

#from_epoch(column, unit: "s", eager: false) ⇒ Object

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

  • unit: "d" returns pl.Date
  • unit: "s" returns pl.Datetime"us"
  • unit: "ms" returns pl.Datetime["ms"]
  • unit: "us" returns pl.Datetime["us"]
  • unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘

Parameters:

  • column (Object)

    Series or expression to parse integers to pl.Datetime.

  • unit (String) (defaults to: "s")

    The unit of the timesteps since epoch time.

  • eager (Boolean) (defaults to: false)

    If eager evaluation is true, a Series is returned instead of an Expr.

Returns:



1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
# File 'lib/polars/functions/lazy.rb', line 1209

def from_epoch(column, unit: "s", eager: false)
  if Utils.strlike?(column)
    column = col(column)
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
    column = Series.new(column)
  end

  if unit == "d"
    expr = column.cast(Date)
  elsif unit == "s"
    expr = (column.cast(Int64) * 1_000_000).cast(Datetime.new("us"))
  elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
    expr = column.cast(Datetime.new(unit))
  else
    raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
  end

  if eager
    if !column.is_a?(Series)
      raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
    else
      column.to_frame.select(expr).to_series
    end
  else
    expr
  end
end

#get_dummies(df, columns: nil) ⇒ DataFrame

Convert categorical variables into dummy/indicator variables.

Parameters:

  • df (DataFrame)

    DataFrame to convert.

  • columns (Array, nil) (defaults to: nil)

    A subset of columns to convert to dummy variables. nil means "all columns".

Returns:



12
13
14
# File 'lib/polars/functions.rb', line 12

def get_dummies(df, columns: nil)
  df.to_dummies(columns: columns)
end

#groups(column) ⇒ Object

Syntactic sugar for Polars.col("foo").agg_groups.

Returns:



948
949
950
# File 'lib/polars/functions/lazy.rb', line 948

def groups(column)
  col(column).agg_groups
end

#head(column, n = 10) ⇒ Expr

Get the first n rows.

This function is syntactic sugar for col(column).head(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.head("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.head("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



571
572
573
# File 'lib/polars/functions/lazy.rb', line 571

def head(column, n = 10)
  col(column).head(n)
end

#implode(*columns) ⇒ Expr

Aggregate all column values into a list.

This function is syntactic sugar for col(name).implode.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => [9, 8, 7],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.implode("a"))
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# └───────────┘
df.select(Polars.implode("b", "c"))
# =>
# shape: (1, 2)
# ┌───────────┬───────────────────────┐
# │ b         ┆ c                     │
# │ ---       ┆ ---                   │
# │ list[i64] ┆ list[str]             │
# ╞═══════════╪═══════════════════════╡
# │ [9, 8, 7] ┆ ["foo", "bar", "foo"] │
# └───────────┴───────────────────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



150
151
152
# File 'lib/polars/functions/lazy.rb', line 150

def implode(*columns)
  col(*columns).implode
end

#int_range(start, stop = nil, step: 1, eager: false, dtype: nil) ⇒ Expr, Series Also known as: arange

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

Polars.arange(0, 3, eager: true)
# =>
# shape: (3,)
# Series: 'arange' [i64]
# [
#         0
#         1
#         2
# ]

Parameters:

  • start (Integer, Expr, Series)

    Lower bound of range.

  • stop (Integer, Expr, Series) (defaults to: nil)

    Upper bound of range.

  • step (Integer) (defaults to: 1)

    Step size of the range.

  • eager (Boolean) (defaults to: false)

    If eager evaluation is True, a Series is returned instead of an Expr.

  • dtype (Symbol) (defaults to: nil)

    Apply an explicit integer dtype to the resulting expression (default is Int64).

Returns:



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/polars/functions/range/int_range.rb', line 31

def int_range(start, stop = nil, step: 1, eager: false, dtype: nil)
  if stop.nil?
    stop = start
    start = 0
  end

  start = Utils.parse_as_expression(start)
  stop = Utils.parse_as_expression(stop)
  dtype ||= Int64
  dtype = dtype.to_s if dtype.is_a?(Symbol)
  result = Utils.wrap_expr(Plr.int_range(start, stop, step, dtype)).alias("arange")

  if eager
    return select(result).to_series
  end

  result
end

#last(*columns) ⇒ Expr

Get the last value.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "baz"]
  }
)
df.select(Polars.last)
# =>
# shape: (3, 1)
# ┌─────┐
# │ c   │
# │ --- │
# │ str │
# ╞═════╡
# │ foo │
# │ bar │
# │ baz │
# └─────┘
df.select(Polars.last("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘
df.select(Polars.last("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ str │
# ╞═════╪═════╡
# │ 2   ┆ baz │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names. If set to nil (default), returns an expression to take the last column of the context instead.

Returns:



519
520
521
522
523
524
525
# File 'lib/polars/functions/lazy.rb', line 519

def last(*columns)
  if columns.empty?
    return Utils.wrap_expr(Plr.last)
  end

  col(*columns).last
end

#lenExpr Also known as: length

Return the number of rows in the context.

This is similar to COUNT(*) in SQL.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, nil],
    "b" => [3, nil, nil],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.len)
# =>
# shape: (1, 1)
# ┌─────┐
# │ len │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 3   │
# └─────┘

Generate an index column by using len in conjunction with int_range.

df.select([
  Polars.int_range(Polars.len, dtype: Polars::UInt32).alias("index"),
  Polars.all
])
# =>
# shape: (3, 4)
# ┌───────┬──────┬──────┬─────┐
# │ index ┆ a    ┆ b    ┆ c   │
# │ ---   ┆ ---  ┆ ---  ┆ --- │
# │ u32   ┆ i64  ┆ i64  ┆ str │
# ╞═══════╪══════╪══════╪═════╡
# │ 0     ┆ 1    ┆ 3    ┆ foo │
# │ 1     ┆ 2    ┆ null ┆ bar │
# │ 2     ┆ null ┆ null ┆ foo │
# └───────┴──────┴──────┴─────┘

Returns:



44
45
46
# File 'lib/polars/functions/len.rb', line 44

def len
  Utils.wrap_expr(Plr.len)
end

#lit(value, dtype: nil, allow_object: nil) ⇒ Expr

Return an expression representing a literal value.

Returns:



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/polars/functions/lit.rb', line 6

def lit(value, dtype: nil, allow_object: nil)
  if value.is_a?(::Time) || value.is_a?(::DateTime)
    time_unit = dtype&.time_unit || "ns"
    time_zone = dtype.&time_zone
    e = lit(Utils._datetime_to_pl_timestamp(value, time_unit)).cast(Datetime.new(time_unit))
    if time_zone
      return e.dt.replace_time_zone(time_zone.to_s)
    else
      return e
    end
  elsif value.is_a?(::Date)
    return lit(::Time.utc(value.year, value.month, value.day)).cast(Date)
  elsif value.is_a?(Polars::Series)
    name = value.name
    value = value._s
    e = Utils.wrap_expr(Plr.lit(value, allow_object))
    if name == ""
      return e
    end
    return e.alias(name)
  elsif (defined?(Numo::NArray) && value.is_a?(Numo::NArray)) || value.is_a?(::Array)
    return lit(Series.new("", value))
  elsif dtype
    return Utils.wrap_expr(Plr.lit(value, allow_object)).cast(dtype)
  end

  Utils.wrap_expr(Plr.lit(value, allow_object))
end

#max(*names) ⇒ Expr

Get the maximum value.

Syntactic sugar for col(names).max.

Examples:

Get the maximum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.max("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# └─────┘

Get the maximum value of multiple columns.

df.select(Polars.max("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘
df.select(Polars.max("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 8   ┆ 5   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



135
136
137
# File 'lib/polars/functions/aggregation/vertical.rb', line 135

def max(*names)
  col(*names).max
end

#max_horizontal(*exprs) ⇒ Expr

Get the maximum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(max: Polars.max_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ max │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 4   │
# │ 8   ┆ 5    ┆ y   ┆ 8   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



103
104
105
106
# File 'lib/polars/functions/aggregation/horizontal.rb', line 103

def max_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.max_horizontal(rbexprs))
end

#mean(*columns) ⇒ Expr Also known as: avg

Get the mean value.

This function is syntactic sugar for col(columns).mean.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.mean("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 4.0 │
# └─────┘
df.select(Polars.mean("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬──────────┐
# │ a   ┆ b        │
# │ --- ┆ ---      │
# │ f64 ┆ f64      │
# ╞═════╪══════════╡
# │ 4.0 ┆ 3.666667 │
# └─────┴──────────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



272
273
274
# File 'lib/polars/functions/lazy.rb', line 272

def mean(*columns)
  col(*columns).mean
end

#mean_horizontal(*exprs) ⇒ Expr

Compute the mean of all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(mean: Polars.mean_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬──────┐
# │ a   ┆ b    ┆ c   ┆ mean │
# │ --- ┆ ---  ┆ --- ┆ ---  │
# │ i64 ┆ i64  ┆ str ┆ f64  │
# ╞═════╪══════╪═════╪══════╡
# │ 1   ┆ 4    ┆ x   ┆ 2.5  │
# │ 8   ┆ 5    ┆ y   ┆ 6.5  │
# │ 3   ┆ null ┆ z   ┆ 3.0  │
# └─────┴──────┴─────┴──────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



202
203
204
205
# File 'lib/polars/functions/aggregation/horizontal.rb', line 202

def mean_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
end

#median(*columns) ⇒ Expr

Get the median value.

This function is syntactic sugar for pl.col(columns).median.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.median("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ f64 │
# ╞═════╡
# │ 3.0 │
# └─────┘
df.select(Polars.median("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ f64 ┆ f64 │
# ╞═════╪═════╡
# │ 3.0 ┆ 4.0 │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



316
317
318
# File 'lib/polars/functions/lazy.rb', line 316

def median(*columns)
  col(*columns).median
end

#min(*names) ⇒ Expr

Get the minimum value.

Syntactic sugar for col(names).min.

Examples:

Get the minimum value of a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.min("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# └─────┘

Get the minimum value of multiple columns.

df.select(Polars.min("^a|b$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘
df.select(Polars.min("a", "b"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ b   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 1   ┆ 2   │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



190
191
192
# File 'lib/polars/functions/aggregation/vertical.rb', line 190

def min(*names)
  col(*names).min
end

#min_horizontal(*exprs) ⇒ Expr

Get the minimum value horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(min: Polars.min_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ min │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 1   │
# │ 8   ┆ 5    ┆ y   ┆ 5   │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



136
137
138
139
# File 'lib/polars/functions/aggregation/horizontal.rb', line 136

def min_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.min_horizontal(rbexprs))
end

#n_unique(*columns) ⇒ Expr

Count unique values.

This function is syntactic sugar for col(columns).n_unique.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 1],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.n_unique("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# └─────┘
df.select(Polars.n_unique("b", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ u32 ┆ u32 │
# ╞═════╪═════╡
# │ 3   ┆ 2   │
# └─────┴─────┘

Parameters:

  • columns (Array)

    One or more column names.

Returns:



359
360
361
# File 'lib/polars/functions/lazy.rb', line 359

def n_unique(*columns)
  col(*columns).n_unique
end

#ones(n, dtype: nil, eager: true) ⇒ Object

Construct a column of length n filled with ones.

This is syntactic sugar for the repeat function.

Examples:

Polars.ones(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'ones' [i8]
# [
#         1
#         1
#         1
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: true)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



81
82
83
84
85
86
87
88
# File 'lib/polars/functions/repeat.rb', line 81

def ones(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(1, dtype)).nil?
    msg = "invalid dtype for `ones`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("ones")
end

#pearson_corr(a, b, ddof: 1) ⇒ Expr

Compute the pearson's correlation between two columns.

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    Delta degrees of freedom

Returns:



53
54
55
# File 'lib/polars/functions.rb', line 53

def pearson_corr(a, b, ddof: 1)
  corr(a, b, method: "pearson", ddof: ddof)
end

#quantile(column, quantile, interpolation: "nearest") ⇒ Expr

Syntactic sugar for Polars.col("foo").quantile(...).

Parameters:

  • column (String)

    Column name.

  • quantile (Float)

    Quantile between 0.0 and 1.0.

  • interpolation ("nearest", "higher", "lower", "midpoint", "linear") (defaults to: "nearest")

    Interpolation method.

Returns:



962
963
964
# File 'lib/polars/functions/lazy.rb', line 962

def quantile(column, quantile, interpolation: "nearest")
  col(column).quantile(quantile, interpolation: interpolation)
end

#repeat(value, n, dtype: nil, eager: false, name: nil) ⇒ Object

Repeat a single value n times.

Examples:

Construct a column with a repeated value in a lazy context.

Polars.select(Polars.repeat("z", 3)).to_series
# =>
# shape: (3,)
# Series: 'repeat' [str]
# [
#         "z"
#         "z"
#         "z"
# ]

Generate a Series directly by setting eager: true.

Polars.repeat(3, 3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'repeat' [i8]
# [
#         3
#         3
#         3
# ]

Parameters:

  • value (Object)

    Value to repeat.

  • n (Integer)

    Repeat n times.

  • eager (Boolean) (defaults to: false)

    Run eagerly and collect into a Series.

  • name (String) (defaults to: nil)

    Only used in eager mode. As expression, use alias.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/polars/functions/repeat.rb', line 37

def repeat(value, n, dtype: nil, eager: false, name: nil)
  if !name.nil?
    warn "the `name` argument is deprecated. Use the `alias` method instead."
  end

  if n.is_a?(Integer)
    n = lit(n)
  end

  value = Utils.parse_as_expression(value, str_as_lit: true)
  expr = Utils.wrap_expr(Plr.repeat(value, n._rbexpr, dtype))
  if !name.nil?
    expr = expr.alias(name)
  end
  if eager
    return select(expr).to_series
  end
  expr
end

#select(*exprs, **named_exprs) ⇒ DataFrame

Run polars expressions without a context.

This is syntactic sugar for running df.select on an empty DataFrame.

Examples:

foo = Polars::Series.new("foo", [1, 2, 3])
bar = Polars::Series.new("bar", [3, 2, 1])
Polars.select(min: Polars.min_horizontal(foo, bar))
# =>
# shape: (3, 1)
# ┌─────┐
# │ min │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 2   │
# │ 1   │
# └─────┘

Parameters:

  • exprs (Array)

    Column(s) to select, specified as positional arguments. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

  • named_exprs (Hash)

    Additional columns to select, specified as keyword arguments. The columns will be renamed to the keyword used.

Returns:



1088
1089
1090
# File 'lib/polars/functions/lazy.rb', line 1088

def select(*exprs, **named_exprs)
  DataFrame.new([]).select(*exprs, **named_exprs)
end

#set_random_seed(seed) ⇒ nil

Set the global random seed for Polars.

This random seed is used to determine things such as shuffle ordering.

Parameters:

  • seed (Integer)

    A non-negative integer < 2**64 used to seed the internal global random number generator.

Returns:

  • (nil)


12
13
14
# File 'lib/polars/functions/random.rb', line 12

def set_random_seed(seed)
  Plr.set_random_seed(seed)
end

#spearman_rank_corr(a, b, ddof: 1, propagate_nans: false) ⇒ Expr

Compute the spearman rank correlation between two columns.

Missing data will be excluded from the computation.

Parameters:

  • a (Object)

    Column name or Expression.

  • b (Object)

    Column name or Expression.

  • ddof (Integer) (defaults to: 1)

    Delta degrees of freedom

  • propagate_nans (Boolean) (defaults to: false)

    If True any NaN encountered will lead to NaN in the output. Defaults to False where NaN are regarded as larger than any finite number and thus lead to the highest rank.

Returns:



39
40
41
# File 'lib/polars/functions.rb', line 39

def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
  corr(a, b, method: "spearman", ddof: ddof, propagate_nans: propagate_nans)
end

#sql_expr(sql) ⇒ Expr

Parse one or more SQL expressions to polars expression(s).

Examples:

Parse a single SQL expression:

df = Polars::DataFrame.new({"a" => [2, 1]})
expr = Polars.sql_expr("MAX(a)")
df.select(expr)
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 2   │
# └─────┘

Parse multiple SQL expressions:

df.with_columns(
  *Polars.sql_expr(["POWER(a,a) AS a_a", "CAST(a AS TEXT) AS a_txt"])
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬───────┐
# │ a   ┆ a_a ┆ a_txt │
# │ --- ┆ --- ┆ ---   │
# │ i64 ┆ f64 ┆ str   │
# ╞═════╪═════╪═══════╡
# │ 2   ┆ 4.0 ┆ 2     │
# │ 1   ┆ 1.0 ┆ 1     │
# └─────┴─────┴───────┘

Parameters:

  • sql (Object)

    One or more SQL expressions.

Returns:



1272
1273
1274
1275
1276
1277
1278
# File 'lib/polars/functions/lazy.rb', line 1272

def sql_expr(sql)
  if sql.is_a?(::String)
    Utils.wrap_expr(Plr.sql_expr(sql))
  else
    sql.map { |q| Utils.wrap_expr(Plr.sql_expr(q)) }
  end
end

#std(column, ddof: 1) ⇒ Expr

Get the standard deviation.

This function is syntactic sugar for col(column).std(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.std("a"))
# =>
# shape: (1, 1)
# ┌──────────┐
# │ a        │
# │ ---      │
# │ f64      │
# ╞══════════╡
# │ 3.605551 │
# └──────────┘
df["a"].std
# => 3.605551275463989

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



189
190
191
# File 'lib/polars/functions/lazy.rb', line 189

def std(column, ddof: 1)
  col(column).std(ddof: ddof)
end

#struct(exprs, eager: false) ⇒ Object

Collect several columns into a Series of dtype Struct.

Examples:

Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
).select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Only collect specific columns as a struct:

df = Polars::DataFrame.new(
  {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
)
df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
# =>
# shape: (4, 4)
# ┌─────┬───────┬─────┬─────────────┐
# │ a   ┆ b     ┆ c   ┆ a_and_b     │
# │ --- ┆ ---   ┆ --- ┆ ---         │
# │ i64 ┆ str   ┆ i64 ┆ struct[2]   │
# ╞═════╪═══════╪═════╪═════════════╡
# │ 1   ┆ one   ┆ 9   ┆ {1,"one"}   │
# │ 2   ┆ two   ┆ 8   ┆ {2,"two"}   │
# │ 3   ┆ three ┆ 7   ┆ {3,"three"} │
# │ 4   ┆ four  ┆ 6   ┆ {4,"four"}  │
# └─────┴───────┴─────┴─────────────┘

Parameters:

  • exprs (Object)

    Columns/Expressions to collect into a Struct

  • eager (Boolean) (defaults to: false)

    Evaluate immediately

Returns:



140
141
142
143
144
145
146
# File 'lib/polars/functions/as_datatype.rb', line 140

def struct(exprs, eager: false)
  if eager
    Polars.select(struct(exprs, eager: false)).to_series
  end
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(Plr.as_struct(exprs))
end

#sum(*names) ⇒ Expr

Sum all values.

Syntactic sugar for col(name).sum.

Examples:

Sum a column.

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
df.select(Polars.sum("a"))
# =>
# shape: (1, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 3   │
# └─────┘

Sum multiple columns.

df.select(Polars.sum("a", "c"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ a   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 3   ┆ 11  │
# └─────┴─────┘
df.select(Polars.sum("^.*[bc]$"))
# =>
# shape: (1, 2)
# ┌─────┬─────┐
# │ b   ┆ c   │
# │ --- ┆ --- │
# │ i64 ┆ i64 │
# ╞═════╪═════╡
# │ 7   ┆ 11  │
# └─────┴─────┘

Parameters:

  • names (Array)

    Name(s) of the columns to use in the aggregation.

Returns:



245
246
247
# File 'lib/polars/functions/aggregation/vertical.rb', line 245

def sum(*names)
  col(*names).sum
end

#sum_horizontal(*exprs) ⇒ Expr

Sum all values horizontally across columns.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, nil],
    "c" => ["x", "y", "z"]
  }
)
df.with_columns(sum: Polars.sum_horizontal("a", "b"))
# =>
# shape: (3, 4)
# ┌─────┬──────┬─────┬─────┐
# │ a   ┆ b    ┆ c   ┆ sum │
# │ --- ┆ ---  ┆ --- ┆ --- │
# │ i64 ┆ i64  ┆ str ┆ i64 │
# ╞═════╪══════╪═════╪═════╡
# │ 1   ┆ 4    ┆ x   ┆ 5   │
# │ 8   ┆ 5    ┆ y   ┆ 13  │
# │ 3   ┆ null ┆ z   ┆ 3   │
# └─────┴──────┴─────┴─────┘

Parameters:

  • exprs (Array)

    Column(s) to use in the aggregation. Accepts expression input. Strings are parsed as column names, other non-expression inputs are parsed as literals.

Returns:



169
170
171
172
# File 'lib/polars/functions/aggregation/horizontal.rb', line 169

def sum_horizontal(*exprs)
  rbexprs = Utils.parse_as_list_of_expressions(*exprs)
  Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
end

#tail(column, n = 10) ⇒ Expr

Get the last n rows.

This function is syntactic sugar for col(column).tail(n).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.tail("a"))
# =>
# shape: (3, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# │ 8   │
# │ 3   │
# └─────┘
df.select(Polars.tail("a", 2))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 8   │
# │ 3   │
# └─────┘

Parameters:

  • column (Object)

    Column name.

  • n (Integer) (defaults to: 10)

    Number of rows to return.

Returns:



619
620
621
# File 'lib/polars/functions/lazy.rb', line 619

def tail(column, n = 10)
  col(column).tail(n)
end

#to_list(name) ⇒ Expr

Aggregate to list.

Returns:



19
20
21
# File 'lib/polars/functions.rb', line 19

def to_list(name)
  col(name).list
end

#using_string_cacheBoolean

Check whether the global string cache is enabled.

Returns:



71
72
73
# File 'lib/polars/string_cache.rb', line 71

def using_string_cache
  Plr.using_string_cache
end

#var(column, ddof: 1) ⇒ Expr

Get the variance.

This function is syntactic sugar for col(column).var(ddof: ddof).

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 8, 3],
    "b" => [4, 5, 2],
    "c" => ["foo", "bar", "foo"]
  }
)
df.select(Polars.var("a"))
# =>
# shape: (1, 1)
# ┌──────┐
# │ a    │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ 13.0 │
# └──────┘
df["a"].var
# => 13.0

Parameters:

  • column (Object)

    Column name.

  • ddof (Integer) (defaults to: 1)

    “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1.

Returns:



228
229
230
# File 'lib/polars/functions/lazy.rb', line 228

def var(column, ddof: 1)
  col(column).var(ddof: ddof)
end

#when(expr) ⇒ When

Start a "when, then, otherwise" expression.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# │ 3   ┆ 4   ┆ 1       │
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘

Returns:

  • (When)


21
22
23
24
25
# File 'lib/polars/functions/whenthen.rb', line 21

def when(expr)
  expr = Utils.expr_to_lit_or_expr(expr)
  pw = Plr.when(expr._rbexpr)
  When.new(pw)
end

#zeros(n, dtype: nil, eager: true) ⇒ Object

Construct a column of length n filled with zeros.

This is syntactic sugar for the repeat function.

Examples:

Polars.zeros(3, dtype: Polars::Int8, eager: true)
# =>
# shape: (3,)
# Series: 'zeros' [i8]
# [
#         0
#         0
#         0
# ]

Parameters:

  • n (Integer)

    Length of the resulting column.

  • dtype (Object) (defaults to: nil)

    Data type of the resulting column. Defaults to Float64.

  • eager (Boolean) (defaults to: true)

    Evaluate immediately and return a Series. If set to false, return an expression instead.

Returns:



114
115
116
117
118
119
120
121
# File 'lib/polars/functions/repeat.rb', line 114

def zeros(n, dtype: nil, eager: true)
  if (zero = _one_or_zero_by_dtype(0, dtype)).nil?
    msg = "invalid dtype for `zeros`; found #{dtype}"
    raise TypeError, msg
  end

  repeat(zero, n, dtype: dtype, eager: eager).alias("zeros")
end