Module: Polars::LazyFunctions

Included in:
Polars
Defined in:
lib/polars/lazy_functions.rb

Instance Method Summary collapse

Instance Method Details

#all(name = nil) ⇒ Expr

Do one of two things.

  • function can do a columnwise or elementwise AND operation
  • a wildcard column selection

Examples:

Sum all columns

df = Polars::DataFrame.new(
  {"a" => [1, 2, 3], "b" => ["hello", "foo", "bar"], "c" => [1, 1, 1]}
)
df.select(Polars.all.sum)
# =>
# shape: (1, 3)
# ┌─────┬──────┬─────┐
# │ a   ┆ b    ┆ c   │
# │ --- ┆ ---  ┆ --- │
# │ i64 ┆ str  ┆ i64 │
# ╞═════╪══════╪═════╡
# │ 6   ┆ null ┆ 3   │
# └─────┴──────┴─────┘


589
590
591
592
593
594
595
596
597
# File 'lib/polars/lazy_functions.rb', line 589

def all(name = nil)
  if name.nil?
    col("*")
  elsif name.is_a?(String) || name.is_a?(Symbol)
    col(name).all
  else
    raise Todo
  end
end

#any(name) ⇒ Expr

Evaluate columnwise or elementwise with a bitwise OR operation.



488
489
490
491
492
493
494
# File 'lib/polars/lazy_functions.rb', line 488

def any(name)
  if name.is_a?(String)
    col(name).any
  else
    fold(lit(false), ->(a, b) { a.cast(:bool) | b.cast(:bool) }, name).alias("any")
  end
end

#arange(low, high, step: 1, eager: false, dtype: nil) ⇒ Expr, Series

Create a range expression (or Series).

This can be used in a select, with_column, etc. Be sure that the resulting range size is equal to the length of the DataFrame you are collecting.

Examples:

df.lazy.filter(Polars.col("foo") < Polars.arange(0, 100)).collect


640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
# File 'lib/polars/lazy_functions.rb', line 640

def arange(low, high, step: 1, eager: false, dtype: nil)
  low = Utils.expr_to_lit_or_expr(low, str_to_lit: false)
  high = Utils.expr_to_lit_or_expr(high, str_to_lit: false)
  range_expr = Utils.wrap_expr(RbExpr.arange(low._rbexpr, high._rbexpr, step))

  if !dtype.nil? && dtype != "i64"
    range_expr = range_expr.cast(dtype)
  end

  if !eager
    range_expr
  else
    DataFrame.new
      .select(range_expr)
      .to_series
      .rename("arange", in_place: true)
  end
end

#arg_where(condition, eager: false) ⇒ Expr, Series

Return indices where condition evaluates true.

Examples:

df = Polars::DataFrame.new({"a" => [1, 2, 3, 4, 5]})
df.select(
  [
    Polars.arg_where(Polars.col("a") % 2 == 0)
  ]
).to_series
# =>
# shape: (2,)
# Series: 'a' [u32]
# [
#         1
#         3
# ]


1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
# File 'lib/polars/lazy_functions.rb', line 1057

def arg_where(condition, eager: false)
  if eager
    if !condition.is_a?(Series)
      raise ArgumentError, "expected 'Series' in 'arg_where' if 'eager=True', got #{condition.class.name}"
    end
    condition.to_frame.select(arg_where(Polars.col(condition.name))).to_series
  else
    condition = Utils.expr_to_lit_or_expr(condition, str_to_lit: true)
    Utils.wrap_expr(_arg_where(condition._rbexpr))
  end
end

#argsort_by(exprs, reverse: false) ⇒ Expr

Find the indexes that would sort the columns.

Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on.



671
672
673
674
675
676
677
678
679
680
# File 'lib/polars/lazy_functions.rb', line 671

def argsort_by(exprs, reverse: false)
  if !exprs.is_a?(Array)
    exprs = [exprs]
  end
  if reverse == true || reverse == false
    reverse = [reverse] * exprs.length
  end
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(RbExpr.argsort_by(exprs, reverse))
end

#avg(column) ⇒ Expr, Float

Get the mean value.



184
185
186
# File 'lib/polars/lazy_functions.rb', line 184

def avg(column)
  mean(column)
end

#coalesce(exprs) ⇒ Expr

Folds the expressions from left to right, keeping the first non-null value.

Examples:

df = Polars::DataFrame.new(
  [
    [nil, 1.0, 1.0],
    [nil, 2.0, 2.0],
    [nil, nil, 3.0],
    [nil, nil, nil]
  ],
  columns: [["a", :f64], ["b", :f64], ["c", :f64]]
)
df.with_column(Polars.coalesce(["a", "b", "c", 99.9]).alias("d"))
# =>
# shape: (4, 4)
# ┌──────┬──────┬──────┬──────┐
# │ a    ┆ b    ┆ c    ┆ d    │
# │ ---  ┆ ---  ┆ ---  ┆ ---  │
# │ f64  ┆ f64  ┆ f64  ┆ f64  │
# ╞══════╪══════╪══════╪══════╡
# │ null ┆ 1.0  ┆ 1.0  ┆ 1.0  │
# ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ null ┆ 2.0  ┆ 2.0  ┆ 2.0  │
# ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ null ┆ null ┆ 3.0  ┆ 3.0  │
# ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ null ┆ null ┆ null ┆ 99.9 │
# └──────┴──────┴──────┴──────┘


1102
1103
1104
1105
# File 'lib/polars/lazy_functions.rb', line 1102

def coalesce(exprs)
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(_coalesce_exprs(exprs))
end

#col(name) ⇒ Expr

Return an expression representing a column in a DataFrame.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/polars/lazy_functions.rb', line 6

def col(name)
  if name.is_a?(Series)
    name = name.to_a
  end

  if name.is_a?(Class) && name < DataType
    name = [name]
  end

  if name.is_a?(DataType)
    Utils.wrap_expr(_dtype_cols([name]))
  elsif name.is_a?(Array)
    if name.length == 0 || name[0].is_a?(String) || name[0].is_a?(Symbol)
      name = name.map { |v| v.is_a?(Symbol) ? v.to_s : v }
      Utils.wrap_expr(RbExpr.cols(name))
    elsif Utils.is_polars_dtype(name[0])
      Utils.wrap_expr(_dtype_cols(name))
    else
      raise ArgumentError, "Expected list values to be all `str` or all `DataType`"
    end
  else
    name = name.to_s if name.is_a?(Symbol)
    Utils.wrap_expr(RbExpr.col(name))
  end
end

#collect_all(lazy_frames, type_coercion: true, predicate_pushdown: true, projection_pushdown: true, simplify_expression: true, string_cache: false, no_optimization: false, slice_pushdown: true, common_subplan_elimination: true, allow_streaming: false) ⇒ Array

Collect multiple LazyFrames at the same time.

This runs all the computation graphs in parallel on Polars threadpool.



900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
# File 'lib/polars/lazy_functions.rb', line 900

def collect_all(
  lazy_frames,
  type_coercion: true,
  predicate_pushdown: true,
  projection_pushdown: true,
  simplify_expression: true,
  string_cache: false,
  no_optimization: false,
  slice_pushdown: true,
  common_subplan_elimination: true,
  allow_streaming: false
)
  if no_optimization
    predicate_pushdown = false
    projection_pushdown = false
    slice_pushdown = false
    common_subplan_elimination = false
  end

  prepared = []

  lazy_frames.each do |lf|
    ldf = lf._ldf.optimization_toggle(
      type_coercion,
      predicate_pushdown,
      projection_pushdown,
      simplify_expression,
      slice_pushdown,
      common_subplan_elimination,
      allow_streaming
    )
    prepared << ldf
  end

  out = _collect_all(prepared)

  # wrap the rbdataframes into dataframe
  result = out.map { |rbdf| Utils.wrap_df(rbdf) }

  result
end

#concat_list(exprs) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.



869
870
871
872
# File 'lib/polars/lazy_functions.rb', line 869

def concat_list(exprs)
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(RbExpr.concat_lst(exprs))
end

#concat_str(exprs, sep: "") ⇒ Expr

Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2, 3],
    "b" => ["dogs", "cats", nil],
    "c" => ["play", "swim", "walk"]
  }
)
df.with_columns(
  [
    Polars.concat_str(
      [
        Polars.col("a") * 2,
        Polars.col("b"),
        Polars.col("c")
      ],
      sep: " "
    ).alias("full_sentence")
  ]
)
# =>
# shape: (3, 4)
# ┌─────┬──────┬──────┬───────────────┐
# │ a   ┆ b    ┆ c    ┆ full_sentence │
# │ --- ┆ ---  ┆ ---  ┆ ---           │
# │ i64 ┆ str  ┆ str  ┆ str           │
# ╞═════╪══════╪══════╪═══════════════╡
# │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 3   ┆ null ┆ walk ┆ null          │
# └─────┴──────┴──────┴───────────────┘


806
807
808
809
# File 'lib/polars/lazy_functions.rb', line 806

def concat_str(exprs, sep: "")
  exprs = Utils.selection_to_rbexpr_list(exprs)
  return Utils.wrap_expr(RbExpr.concat_str(exprs, sep))
end

#count(column = nil) ⇒ Expr, Integer

Count the number of values in this column/context.



68
69
70
71
72
73
74
75
76
77
78
# File 'lib/polars/lazy_functions.rb', line 68

def count(column = nil)
  if column.nil?
    return Utils.wrap_expr(RbExpr.count)
  end

  if column.is_a?(Series)
    column.len
  else
    col(column).count
  end
end

#cov(a, b) ⇒ Expr

Compute the covariance between two columns/ expressions.



420
421
422
423
424
425
426
427
428
# File 'lib/polars/lazy_functions.rb', line 420

def cov(a, b)
  if a.is_a?(String)
    a = col(a)
  end
  if b.is_a?(String)
    b = col(b)
  end
  Utils.wrap_expr(RbExpr.cov(a._rbexpr, b._rbexpr))
end

#cumfold(acc, f, exprs, include_init: false) ⇒ Object

Note:

If you simply want the first encountered expression as accumulator, consider using cumreduce.

Cumulatively accumulate over multiple columns horizontally/row wise with a left fold.

Every cumulative result is added as a separate field in a Struct column.



472
473
474
475
476
477
478
479
480
# File 'lib/polars/lazy_functions.rb', line 472

def cumfold(acc, f, exprs, include_init: false)
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(RbExpr.cumfold(acc._rbexpr, f, exprs, include_init))
end

#cumsum(column) ⇒ Object

Cumulatively sum values in a column/Series, or horizontally across list of columns/expressions.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [1, 2],
    "b" => [3, 4],
    "c" => [5, 6]
  }
)
# =>
# shape: (2, 3)
# ┌─────┬─────┬─────┐
# │ a   ┆ b   ┆ c   │
# │ --- ┆ --- ┆ --- │
# │ i64 ┆ i64 ┆ i64 │
# ╞═════╪═════╪═════╡
# │ 1   ┆ 3   ┆ 5   │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
# │ 2   ┆ 4   ┆ 6   │
# └─────┴─────┴─────┘

Cumulatively sum a column by name:

df.select(Polars.cumsum("a"))
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ i64 │
# ╞═════╡
# │ 1   │
# ├╌╌╌╌╌┤
# │ 3   │
# └─────┘

Cumulatively sum a list of columns/expressions horizontally:

df.with_column(Polars.cumsum(["a", "c"]))
# =>
# shape: (2, 4)
# ┌─────┬─────┬─────┬───────────┐
# │ a   ┆ b   ┆ c   ┆ cumsum    │
# │ --- ┆ --- ┆ --- ┆ ---       │
# │ i64 ┆ i64 ┆ i64 ┆ struct[2] │
# ╞═════╪═════╪═════╪═══════════╡
# │ 1   ┆ 3   ┆ 5   ┆ {1,6}     │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2   ┆ 4   ┆ 6   ┆ {2,8}     │
# └─────┴─────┴─────┴───────────┘


356
357
358
359
360
361
362
363
364
# File 'lib/polars/lazy_functions.rb', line 356

def cumsum(column)
  if column.is_a?(Series)
    column.cumsum
  elsif column.is_a?(String)
    col(column).cumsum
  else
    cumfold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("cumsum")
  end
end

#duration(days: nil, seconds: nil, nanoseconds: nil, microseconds: nil, milliseconds: nil, minutes: nil, hours: nil, weeks: nil) ⇒ Expr

Create polars Duration from distinct time components.

Examples:

df = Polars::DataFrame.new(
  {
    "datetime" => [DateTime.new(2022, 1, 1), DateTime.new(2022, 1, 2)],
    "add" => [1, 2]
  }
)
df.select(
  [
    (Polars.col("datetime") + Polars.duration(weeks: "add")).alias("add_weeks"),
    (Polars.col("datetime") + Polars.duration(days: "add")).alias("add_days"),
    (Polars.col("datetime") + Polars.duration(seconds: "add")).alias("add_seconds"),
    (Polars.col("datetime") + Polars.duration(milliseconds: "add")).alias(
      "add_milliseconds"
    ),
    (Polars.col("datetime") + Polars.duration(hours: "add")).alias("add_hours")
  ]
)
# =>
# shape: (2, 5)
# ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐
# │ add_weeks           ┆ add_days            ┆ add_seconds         ┆ add_milliseconds        ┆ add_hours           │
# │ ---                 ┆ ---                 ┆ ---                 ┆ ---                     ┆ ---                 │
# │ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]        ┆ datetime[ns]            ┆ datetime[ns]        │
# ╞═════════════════════╪═════════════════════╪═════════════════════╪═════════════════════════╪═════════════════════╡
# │ 2022-01-08 00:00:00 ┆ 2022-01-02 00:00:00 ┆ 2022-01-01 00:00:01 ┆ 2022-01-01 00:00:00.001 ┆ 2022-01-01 01:00:00 │
# ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2022-01-16 00:00:00 ┆ 2022-01-04 00:00:00 ┆ 2022-01-02 00:00:02 ┆ 2022-01-02 00:00:00.002 ┆ 2022-01-02 02:00:00 │
# └─────────────────────┴─────────────────────┴─────────────────────┴─────────────────────────┴─────────────────────┘


715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
# File 'lib/polars/lazy_functions.rb', line 715

def duration(
  days: nil,
  seconds: nil,
  nanoseconds: nil,
  microseconds: nil,
  milliseconds: nil,
  minutes: nil,
  hours: nil,
  weeks: nil
)
  if !hours.nil?
    hours = Utils.expr_to_lit_or_expr(hours, str_to_lit: false)._rbexpr
  end
  if !minutes.nil?
    minutes = Utils.expr_to_lit_or_expr(minutes, str_to_lit: false)._rbexpr
  end
  if !seconds.nil?
    seconds = Utils.expr_to_lit_or_expr(seconds, str_to_lit: false)._rbexpr
  end
  if !milliseconds.nil?
    milliseconds = Utils.expr_to_lit_or_expr(milliseconds, str_to_lit: false)._rbexpr
  end
  if !microseconds.nil?
    microseconds = Utils.expr_to_lit_or_expr(microseconds, str_to_lit: false)._rbexpr
  end
  if !nanoseconds.nil?
    nanoseconds = Utils.expr_to_lit_or_expr(nanoseconds, str_to_lit: false)._rbexpr
  end
  if !days.nil?
    days = Utils.expr_to_lit_or_expr(days, str_to_lit: false)._rbexpr
  end
  if !weeks.nil?
    weeks = Utils.expr_to_lit_or_expr(weeks, str_to_lit: false)._rbexpr
  end

  Utils.wrap_expr(
    _rb_duration(
      days,
      seconds,
      nanoseconds,
      microseconds,
      milliseconds,
      minutes,
      hours,
      weeks
    )
  )
end

#elementExpr

Alias for an element in evaluated in an eval expression.

Examples:

A horizontal rank computation by taking the elements of a list

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).arr.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f32]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘


54
55
56
# File 'lib/polars/lazy_functions.rb', line 54

def element
  col("")
end

#exclude(columns) ⇒ Object

Exclude certain columns from a wildcard/regex selection.

Examples:

df = Polars::DataFrame.new(
  {
    "aa" => [1, 2, 3],
    "ba" => ["a", "b", nil],
    "cc" => [nil, 2.5, 1.5]
  }
)
# =>
# shape: (3, 3)
# ┌─────┬──────┬──────┐
# │ aa  ┆ ba   ┆ cc   │
# │ --- ┆ ---  ┆ ---  │
# │ i64 ┆ str  ┆ f64  │
# ╞═════╪══════╪══════╡
# │ 1   ┆ a    ┆ null │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ 2   ┆ b    ┆ 2.5  │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ 3   ┆ null ┆ 1.5  │
# └─────┴──────┴──────┘

Exclude by column name(s):

df.select(Polars.exclude("ba"))
# =>
# shape: (3, 2)
# ┌─────┬──────┐
# │ aa  ┆ cc   │
# │ --- ┆ ---  │
# │ i64 ┆ f64  │
# ╞═════╪══════╡
# │ 1   ┆ null │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ 2   ┆ 2.5  │
# ├╌╌╌╌╌┼╌╌╌╌╌╌┤
# │ 3   ┆ 1.5  │
# └─────┴──────┘

Exclude by regex, e.g. removing all columns whose names end with the letter "a":

df.select(Polars.exclude("^.*a$"))
# =>
# shape: (3, 1)
# ┌──────┐
# │ cc   │
# │ ---  │
# │ f64  │
# ╞══════╡
# │ null │
# ├╌╌╌╌╌╌┤
# │ 2.5  │
# ├╌╌╌╌╌╌┤
# │ 1.5  │
# └──────┘


561
562
563
# File 'lib/polars/lazy_functions.rb', line 561

def exclude(columns)
  col("*").exclude(columns)
end

#first(column = nil) ⇒ Object

Get the first value.



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/polars/lazy_functions.rb', line 213

def first(column = nil)
  if column.nil?
    return Utils.wrap_expr(RbExpr.first)
  end

  if column.is_a?(Series)
    if column.len > 0
      column[0]
    else
      raise IndexError, "The series is empty, so no first value can be returned."
    end
  else
    col(column).first
  end
end

#fold(acc, f, exprs) ⇒ Expr

Accumulate over multiple columns horizontally/row wise with a left fold.



439
440
441
442
443
444
445
446
447
# File 'lib/polars/lazy_functions.rb', line 439

def fold(acc, f, exprs)
  acc = Utils.expr_to_lit_or_expr(acc, str_to_lit: true)
  if exprs.is_a?(Expr)
    exprs = [exprs]
  end

  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(RbExpr.fold(acc._rbexpr, f, exprs))
end

#format(fstring, *args) ⇒ Expr

Format expressions as a string.

Examples:

df = Polars::DataFrame.new(
  {
    "a": ["a", "b", "c"],
    "b": [1, 2, 3]
  }
)
df.select(
  [
    Polars.format("foo_{}_bar_{}", Polars.col("a"), "b").alias("fmt")
  ]
)
# =>
# shape: (3, 1)
# ┌─────────────┐
# │ fmt         │
# │ ---         │
# │ str         │
# ╞═════════════╡
# │ foo_a_bar_1 │
# ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ foo_b_bar_2 │
# ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ foo_c_bar_3 │
# └─────────────┘


846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
# File 'lib/polars/lazy_functions.rb', line 846

def format(fstring, *args)
  if fstring.scan("{}").length != args.length
    raise ArgumentError, "number of placeholders should equal the number of arguments"
  end

  exprs = []

  arguments = args.each
  fstring.split(/(\{\})/).each do |s|
    if s == "{}"
      e = Utils.expr_to_lit_or_expr(arguments.next, str_to_lit: false)
      exprs << e
    elsif s.length > 0
      exprs << lit(s)
    end
  end

  concat_str(exprs, sep: "")
end

#from_epoch(column, unit: "s", eager: false) ⇒ Object

Utility function that parses an epoch timestamp (or Unix time) to Polars Date(time).

Depending on the unit provided, this function will return a different dtype:

  • unit: "d" returns pl.Date
  • unit: "s" returns pl.Datetime"us"
  • unit: "ms" returns pl.Datetime["ms"]
  • unit: "us" returns pl.Datetime["us"]
  • unit: "ns" returns pl.Datetime["ns"]

Examples:

df = Polars::DataFrame.new({"timestamp" => [1666683077, 1666683099]}).lazy
df.select(Polars.from_epoch(Polars.col("timestamp"), unit: "s")).collect
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ timestamp           │
# │ ---                 │
# │ datetime[μs]        │
# ╞═════════════════════╡
# │ 2022-10-25 07:31:17 │
# ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2022-10-25 07:31:39 │
# └─────────────────────┘


1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
# File 'lib/polars/lazy_functions.rb', line 1139

def from_epoch(column, unit: "s", eager: false)
  if column.is_a?(String)
    column = col(column)
  elsif !column.is_a?(Series) && !column.is_a?(Expr)
    column = Series.new(column)
  end

  if unit == "d"
    expr = column.cast(:date)
  elsif unit == "s"
    raise Todo
    # expr = (column.cast(:i64) * 1_000_000).cast(Datetime("us"))
  elsif Utils::DTYPE_TEMPORAL_UNITS.include?(unit)
    raise Todo
    # expr = column.cast(Datetime(unit))
  else
    raise ArgumentError, "'unit' must be one of {{'ns', 'us', 'ms', 's', 'd'}}, got '#{unit}'."
  end

  if eager
    if !column.is_a?(Series)
      raise ArgumentError, "expected Series or Array if eager: true, got #{column.class.name}"
    else
      column.to_frame.select(expr).to_series
    end
  else
    expr
  end
end

#groups(column) ⇒ Object

Syntactic sugar for Polars.col("foo").agg_groups.



602
603
604
# File 'lib/polars/lazy_functions.rb', line 602

def groups(column)
  col(column).agg_groups
end

#head(column, n = 10) ⇒ Object

Get the first n rows.



261
262
263
264
265
266
267
# File 'lib/polars/lazy_functions.rb', line 261

def head(column, n = 10)
  if column.is_a?(Series)
    column.head(n)
  else
    col(column).head(n)
  end
end

#last(column = nil) ⇒ Object

Get the last value.

Depending on the input type this function does different things:

  • nil -> expression to take last column of a context.
  • String -> syntactic sugar for Polars.col(..).last
  • Series -> Take last value in Series


238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/polars/lazy_functions.rb', line 238

def last(column = nil)
  if column.nil?
    return Utils.wrap_expr(_last)
  end

  if column.is_a?(Series)
    if column.len > 0
      return column[-1]
    else
      raise IndexError, "The series is empty, so no last value can be returned"
    end
  end
  col(column).last
end

#lit(value) ⇒ Expr

Return an expression representing a literal value.



288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/polars/lazy_functions.rb', line 288

def lit(value)
  if value.is_a?(Polars::Series)
    name = value.name
    value = value._s
    e = Utils.wrap_expr(RbExpr.lit(value))
    if name == ""
      return e
    end
    return e.alias(name)
  end

  Utils.wrap_expr(RbExpr.lit(value))
end

#max(column) ⇒ Expr, Object

Get the maximum value.



119
120
121
122
123
124
125
126
127
128
129
# File 'lib/polars/lazy_functions.rb', line 119

def max(column)
  if column.is_a?(Series)
    column.max
  elsif column.is_a?(String) || column.is_a?(Symbol)
    col(column).max
  else
    exprs = Utils.selection_to_rbexpr_list(column)
    # TODO
    Utils.wrap_expr(_max_exprs(exprs))
  end
end

#mean(column) ⇒ Expr, Float

Get the mean value.



173
174
175
176
177
178
179
# File 'lib/polars/lazy_functions.rb', line 173

def mean(column)
  if column.is_a?(Series)
    column.mean
  else
    col(column).mean
  end
end

#median(column) ⇒ Object

Get the median value.



191
192
193
194
195
196
197
# File 'lib/polars/lazy_functions.rb', line 191

def median(column)
  if column.is_a?(Series)
    column.median
  else
    col(column).median
  end
end

#min(column) ⇒ Expr, Object

Get the minimum value.



141
142
143
144
145
146
147
148
149
150
151
# File 'lib/polars/lazy_functions.rb', line 141

def min(column)
  if column.is_a?(Series)
    column.min
  elsif column.is_a?(String) || column.is_a?(Symbol)
    col(column).min
  else
    exprs = Utils.selection_to_rbexpr_list(column)
    # TODO
    Utils.wrap_expr(_min_exprs(exprs))
  end
end

#n_unique(column) ⇒ Object

Count unique values.



202
203
204
205
206
207
208
# File 'lib/polars/lazy_functions.rb', line 202

def n_unique(column)
  if column.is_a?(Series)
    column.n_unique
  else
    col(column).n_unique
  end
end

#pearson_corr(a, b, ddof: 1) ⇒ Expr

Compute the pearson's correlation between two columns.



402
403
404
405
406
407
408
409
410
# File 'lib/polars/lazy_functions.rb', line 402

def pearson_corr(a, b, ddof: 1)
  if a.is_a?(String)
    a = col(a)
  end
  if b.is_a?(String)
    b = col(b)
  end
  Utils.wrap_expr(RbExpr.pearson_corr(a._rbexpr, b._rbexpr, ddof))
end

#quantile(column, quantile, interpolation: "nearest") ⇒ Expr

Syntactic sugar for Polars.col("foo").quantile(...).



616
617
618
# File 'lib/polars/lazy_functions.rb', line 616

def quantile(column, quantile, interpolation: "nearest")
  col(column).quantile(quantile, interpolation: interpolation)
end

#repeat(value, n, eager: false, name: nil) ⇒ Expr

Repeat a single value n times.



1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
# File 'lib/polars/lazy_functions.rb', line 1019

def repeat(value, n, eager: false, name: nil)
  if eager
    if name.nil?
      name = ""
    end
    dtype = py_type_to_dtype(type(value))
    Series._repeat(name, value, n, dtype)
  else
    if n.is_a?(Integer)
      n = lit(n)
    end
    Utils.wrap_expr(RbExpr.repeat(value, n._rbexpr))
  end
end

#select(exprs) ⇒ DataFrame

Run polars expressions without a context.



945
946
947
# File 'lib/polars/lazy_functions.rb', line 945

def select(exprs)
  DataFrame.new([]).select(exprs)
end

#spearman_rank_corr(a, b, ddof: 1, propagate_nans: false) ⇒ Expr

Compute the spearman rank correlation between two columns.

Missing data will be excluded from the computation.



382
383
384
385
386
387
388
389
390
# File 'lib/polars/lazy_functions.rb', line 382

def spearman_rank_corr(a, b, ddof: 1, propagate_nans: false)
  if a.is_a?(String)
    a = col(a)
  end
  if b.is_a?(String)
    b = col(b)
  end
  Utils.wrap_expr(RbExpr.spearman_rank_corr(a._rbexpr, b._rbexpr, ddof, propagate_nans))
end

#std(column, ddof: 1) ⇒ Object

Get the standard deviation.



90
91
92
93
94
95
96
# File 'lib/polars/lazy_functions.rb', line 90

def std(column, ddof: 1)
  if column.is_a?(Series)
    column.std(ddof: ddof)
  else
    col(column).std(ddof: ddof)
  end
end

#struct(exprs, eager: false) ⇒ Object

Collect several columns into a Series of dtype Struct.

Examples:

Polars::DataFrame.new(
  {
    "int" => [1, 2],
    "str" => ["a", "b"],
    "bool" => [true, nil],
    "list" => [[1, 2], [3]],
  }
).select([Polars.struct(Polars.all).alias("my_struct")])
# =>
# shape: (2, 1)
# ┌─────────────────────┐
# │ my_struct           │
# │ ---                 │
# │ struct[4]           │
# ╞═════════════════════╡
# │ {1,"a",true,[1, 2]} │
# ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ {2,"b",null,[3]}    │
# └─────────────────────┘

Only collect specific columns as a struct:

df = Polars::DataFrame.new(
  {"a" => [1, 2, 3, 4], "b" => ["one", "two", "three", "four"], "c" => [9, 8, 7, 6]}
)
df.with_column(Polars.struct(Polars.col(["a", "b"])).alias("a_and_b"))
# =>
# shape: (4, 4)
# ┌─────┬───────┬─────┬─────────────┐
# │ a   ┆ b     ┆ c   ┆ a_and_b     │
# │ --- ┆ ---   ┆ --- ┆ ---         │
# │ i64 ┆ str   ┆ i64 ┆ struct[2]   │
# ╞═════╪═══════╪═════╪═════════════╡
# │ 1   ┆ one   ┆ 9   ┆ {1,"one"}   │
# ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 2   ┆ two   ┆ 8   ┆ {2,"two"}   │
# ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 3   ┆ three ┆ 7   ┆ {3,"three"} │
# ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
# │ 4   ┆ four  ┆ 6   ┆ {4,"four"}  │
# └─────┴───────┴─────┴─────────────┘


999
1000
1001
1002
1003
1004
1005
# File 'lib/polars/lazy_functions.rb', line 999

def struct(exprs, eager: false)
  if eager
    Polars.select(struct(exprs, eager: false)).to_series
  end
  exprs = Utils.selection_to_rbexpr_list(exprs)
  Utils.wrap_expr(_as_struct(exprs))
end

#sum(column) ⇒ Object

Sum values in a column/Series, or horizontally across list of columns/expressions.



156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/polars/lazy_functions.rb', line 156

def sum(column)
  if column.is_a?(Series)
    column.sum
  elsif column.is_a?(String) || column.is_a?(Symbol)
    col(column.to_s).sum
  elsif column.is_a?(Array)
    exprs = Utils.selection_to_rbexpr_list(column)
    # TODO
    Utils.wrap_expr(_sum_exprs(exprs))
  else
    fold(lit(0).cast(:u32), ->(a, b) { a + b }, column).alias("sum")
  end
end

#tail(column, n = 10) ⇒ Object

Get the last n rows.



277
278
279
280
281
282
283
# File 'lib/polars/lazy_functions.rb', line 277

def tail(column, n = 10)
  if column.is_a?(Series)
    column.tail(n)
  else
    col(column).tail(n)
  end
end

#to_list(name) ⇒ Expr

Aggregate to list.



83
84
85
# File 'lib/polars/lazy_functions.rb', line 83

def to_list(name)
  col(name).list
end

#var(column, ddof: 1) ⇒ Object

Get the variance.



101
102
103
104
105
106
107
# File 'lib/polars/lazy_functions.rb', line 101

def var(column, ddof: 1)
  if column.is_a?(Series)
    column.var(ddof: ddof)
  else
    col(column).var(ddof: ddof)
  end
end

#when(expr) ⇒ When

Start a "when, then, otherwise" expression.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 3, 4], "bar" => [3, 4, 0]})
df.with_column(Polars.when(Polars.col("foo") > 2).then(Polars.lit(1)).otherwise(Polars.lit(-1)))
# =>
# shape: (3, 3)
# ┌─────┬─────┬─────────┐
# │ foo ┆ bar ┆ literal │
# │ --- ┆ --- ┆ ---     │
# │ i64 ┆ i64 ┆ i32     │
# ╞═════╪═════╪═════════╡
# │ 1   ┆ 3   ┆ -1      │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 3   ┆ 4   ┆ 1       │
# ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
# │ 4   ┆ 0   ┆ 1       │
# └─────┴─────┴─────────┘


1189
1190
1191
1192
1193
# File 'lib/polars/lazy_functions.rb', line 1189

def when(expr)
  expr = Utils.expr_to_lit_or_expr(expr)
  pw = RbExpr.when(expr._rbexpr)
  When.new(pw)
end