Class: Polars::ListExpr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/list_expr.rb

Overview

Namespace for list related expressions.

Instance Method Summary collapse

Instance Method Details

#[](item) ⇒ Expr

Get the value by index in the sublists.



265
266
267
# File 'lib/polars/list_expr.rb', line 265

def [](item)
  get(item)
end

#arg_maxExpr

Retrieve the index of the maximum value in every sublist.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  }
)
df.select(Polars.col("a").list.arg_max)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 1   │
# │ 0   │
# └─────┘


433
434
435
# File 'lib/polars/list_expr.rb', line 433

def arg_max
  Utils.wrap_expr(_rbexpr.list_arg_max)
end

#arg_minExpr

Retrieve the index of the minimal value in every sublist.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 2], [2, 1]]
  }
)
df.select(Polars.col("a").list.arg_min)
# =>
# shape: (2, 1)
# ┌─────┐
# │ a   │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 0   │
# │ 1   │
# └─────┘


408
409
410
# File 'lib/polars/list_expr.rb', line 408

def arg_min
  Utils.wrap_expr(_rbexpr.list_arg_min)
end

#concat(other) ⇒ Expr

Concat the arrays in a Series dtype List in linear time.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [["a"], ["x"]],
    "b" => [["b", "c"], ["y", "z"]]
  }
)
df.select(Polars.col("a").list.concat("b"))
# =>
# shape: (2, 1)
# ┌─────────────────┐
# │ a               │
# │ ---             │
# │ list[str]       │
# ╞═════════════════╡
# │ ["a", "b", "c"] │
# │ ["x", "y", "z"] │
# └─────────────────┘


217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/polars/list_expr.rb', line 217

def concat(other)
  if other.is_a?(::Array) && ![Expr, String, Series].any? { |c| other[0].is_a?(c) }
    return concat(Series.new([other]))
  end

  if !other.is_a?(::Array)
    other_list = [other]
  else
    other_list = other.dup
  end

  other_list.insert(0, Utils.wrap_expr(_rbexpr))
  Polars.concat_list(other_list)
end

#contains(item) ⇒ Expr

Check if sublists contain the given item.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.contains(1))
# =>
# shape: (3, 1)
# ┌───────┐
# │ foo   │
# │ ---   │
# │ bool  │
# ╞═══════╡
# │ true  │
# │ false │
# │ true  │
# └───────┘


356
357
358
# File 'lib/polars/list_expr.rb', line 356

def contains(item)
  Utils.wrap_expr(_rbexpr.list_contains(Utils.expr_to_lit_or_expr(item)._rbexpr))
end

#count_matches(element) ⇒ Expr Also known as: count_match

Count how often the value produced by element occurs.

Examples:

df = Polars::DataFrame.new({"listcol" => [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]]})
df.select(Polars.col("listcol").list.count_match(2).alias("number_of_twos"))
# =>
# shape: (5, 1)
# ┌────────────────┐
# │ number_of_twos │
# │ ---            │
# │ u32            │
# ╞════════════════╡
# │ 0              │
# │ 0              │
# │ 2              │
# │ 1              │
# │ 0              │
# └────────────────┘


574
575
576
# File 'lib/polars/list_expr.rb', line 574

def count_matches(element)
  Utils.wrap_expr(_rbexpr.list_count_matches(Utils.expr_to_lit_or_expr(element)._rbexpr))
end

#diff(n: 1, null_behavior: "ignore") ⇒ Expr

Calculate the n-th discrete difference of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.diff
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [null, 1, … 1]
#         [null, -8, -1]
# ]


456
457
458
# File 'lib/polars/list_expr.rb', line 456

def diff(n: 1, null_behavior: "ignore")
  Utils.wrap_expr(_rbexpr.list_diff(n, null_behavior))
end

#eval(expr, parallel: false) ⇒ Expr

Run any polars expression against the lists' elements.

Examples:

df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
df.with_column(
  Polars.concat_list(["a", "b"]).list.eval(Polars.element.rank).alias("rank")
)
# =>
# shape: (3, 3)
# ┌─────┬─────┬────────────┐
# │ a   ┆ b   ┆ rank       │
# │ --- ┆ --- ┆ ---        │
# │ i64 ┆ i64 ┆ list[f64]  │
# ╞═════╪═════╪════════════╡
# │ 1   ┆ 4   ┆ [1.0, 2.0] │
# │ 8   ┆ 5   ┆ [2.0, 1.0] │
# │ 3   ┆ 2   ┆ [2.0, 1.0] │
# └─────┴─────┴────────────┘


637
638
639
# File 'lib/polars/list_expr.rb', line 637

def eval(expr, parallel: false)
  Utils.wrap_expr(_rbexpr.list_eval(expr._rbexpr, parallel))
end

#firstExpr

Get the first value of the sublists.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.first)
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3    │
# │ null │
# │ 1    │
# └──────┘


309
310
311
# File 'lib/polars/list_expr.rb', line 309

def first
  get(0)
end

#get(index) ⇒ Expr

Get the value by index in the sublists.

So index 0 would return the first item of every sublist and index -1 would return the last item of every sublist if an index is out of bounds, it will return a None.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.get(0))
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 3    │
# │ null │
# │ 1    │
# └──────┘


257
258
259
260
# File 'lib/polars/list_expr.rb', line 257

def get(index)
  index = Utils.parse_as_expression(index)
  Utils.wrap_expr(_rbexpr.list_get(index))
end

#head(n = 5) ⇒ Expr

Slice the first n values of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.head(2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [1, 2]
#         [10, 2]
# ]


525
526
527
# File 'lib/polars/list_expr.rb', line 525

def head(n = 5)
  slice(0, n)
end

#join(separator) ⇒ Expr

Join all string items in a sublist and place a separator between them.

This errors if inner type of list != :str.

Examples:

df = Polars::DataFrame.new({"s" => [["a", "b", "c"], ["x", "y"]]})
df.select(Polars.col("s").list.join(" "))
# =>
# shape: (2, 1)
# ┌───────┐
# │ s     │
# │ ---   │
# │ str   │
# ╞═══════╡
# │ a b c │
# │ x y   │
# └───────┘


382
383
384
385
# File 'lib/polars/list_expr.rb', line 382

def join(separator)
  separator = Utils.parse_as_expression(separator, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.list_join(separator))
end

#lastExpr

Get the last value of the sublists.

Examples:

df = Polars::DataFrame.new({"foo" => [[3, 2, 1], [], [1, 2]]})
df.select(Polars.col("foo").list.last)
# =>
# shape: (3, 1)
# ┌──────┐
# │ foo  │
# │ ---  │
# │ i64  │
# ╞══════╡
# │ 1    │
# │ null │
# │ 2    │
# └──────┘


331
332
333
# File 'lib/polars/list_expr.rb', line 331

def last
  get(-1)
end

#lengthsExpr Also known as: len

Get the length of the arrays as :u32.

Examples:

df = Polars::DataFrame.new({"foo" => [1, 2], "bar" => [["a", "b"], ["c"]]})
df.select(Polars.col("bar").list.lengths)
# =>
# shape: (2, 1)
# ┌─────┐
# │ bar │
# │ --- │
# │ u32 │
# ╞═════╡
# │ 2   │
# │ 1   │
# └─────┘


29
30
31
# File 'lib/polars/list_expr.rb', line 29

def lengths
  Utils.wrap_expr(_rbexpr.list_len)
end

#maxExpr

Compute the max value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.max)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 3      │
# └────────┘


72
73
74
# File 'lib/polars/list_expr.rb', line 72

def max
  Utils.wrap_expr(_rbexpr.list_max)
end

#meanExpr

Compute the mean value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.mean)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ f64    │
# ╞════════╡
# │ 1.0    │
# │ 2.5    │
# └────────┘


114
115
116
# File 'lib/polars/list_expr.rb', line 114

def mean
  Utils.wrap_expr(_rbexpr.list_mean)
end

#minExpr

Compute the min value of the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.min)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 2      │
# └────────┘


93
94
95
# File 'lib/polars/list_expr.rb', line 93

def min
  Utils.wrap_expr(_rbexpr.list_min)
end

#reverseExpr

Reverse the arrays in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]]
  }
)
df.select(Polars.col("a").list.reverse)
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# │ [2, 1, 9] │
# └───────────┘


164
165
166
# File 'lib/polars/list_expr.rb', line 164

def reverse
  Utils.wrap_expr(_rbexpr.list_reverse)
end

#shift(n = 1) ⇒ Expr

Shift values by the given period.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.shift
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [null, 1, … 3]
#         [null, 10, 2]
# ]


477
478
479
480
# File 'lib/polars/list_expr.rb', line 477

def shift(n = 1)
  n = Utils.parse_as_expression(n)
  Utils.wrap_expr(_rbexpr.list_shift(n))
end

#slice(offset, length = nil) ⇒ Expr

Slice every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.slice(1, 2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [2, 3]
#         [2, 1]
# ]


502
503
504
505
506
# File 'lib/polars/list_expr.rb', line 502

def slice(offset, length = nil)
  offset = Utils.expr_to_lit_or_expr(offset, str_to_lit: false)._rbexpr
  length = Utils.expr_to_lit_or_expr(length, str_to_lit: false)._rbexpr
  Utils.wrap_expr(_rbexpr.list_slice(offset, length))
end

#sort(reverse: false) ⇒ Expr

Sort the arrays in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[3, 2, 1], [9, 1, 2]]
  }
)
df.select(Polars.col("a").list.sort)
# =>
# shape: (2, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2, 3] │
# │ [1, 2, 9] │
# └───────────┘


139
140
141
# File 'lib/polars/list_expr.rb', line 139

def sort(reverse: false)
  Utils.wrap_expr(_rbexpr.list_sort(reverse))
end

#sumExpr

Sum all the lists in the array.

Examples:

df = Polars::DataFrame.new({"values" => [[1], [2, 3]]})
df.select(Polars.col("values").list.sum)
# =>
# shape: (2, 1)
# ┌────────┐
# │ values │
# │ ---    │
# │ i64    │
# ╞════════╡
# │ 1      │
# │ 5      │
# └────────┘


51
52
53
# File 'lib/polars/list_expr.rb', line 51

def sum
  Utils.wrap_expr(_rbexpr.list_sum)
end

#tail(n = 5) ⇒ Expr

Slice the last n values of every sublist.

Examples:

s = Polars::Series.new("a", [[1, 2, 3, 4], [10, 2, 1]])
s.list.tail(2)
# =>
# shape: (2,)
# Series: 'a' [list[i64]]
# [
#         [3, 4]
#         [2, 1]
# ]


546
547
548
549
# File 'lib/polars/list_expr.rb', line 546

def tail(n = 5)
  offset = -Utils.expr_to_lit_or_expr(n, str_to_lit: false)
  slice(offset, n)
end

#take(index, null_on_oob: false) ⇒ Expr

Take sublists by multiple indices.

The indices may be defined in a single column, or by sublists in another column of dtype List.



283
284
285
286
287
288
289
# File 'lib/polars/list_expr.rb', line 283

def take(index, null_on_oob: false)
  if index.is_a?(::Array)
    index = Series.new(index)
  end
  index = Utils.expr_to_lit_or_expr(index, str_to_lit: false)._rbexpr
  Utils.wrap_expr(_rbexpr.list_take(index, null_on_oob))
end

#to_struct(n_field_strategy: "first_non_null", name_generator: nil) ⇒ Expr

Convert the series of type List to a series of type Struct.

Examples:

df = Polars::DataFrame.new({"a" => [[1, 2, 3], [1, 2]]})
df.select([Polars.col("a").list.to_struct])
# =>
# shape: (2, 1)
# ┌────────────┐
# │ a          │
# │ ---        │
# │ struct[3]  │
# ╞════════════╡
# │ {1,2,3}    │
# │ {1,2,null} │
# └────────────┘

Raises:

  • (Todo)


602
603
604
605
# File 'lib/polars/list_expr.rb', line 602

def to_struct(n_field_strategy: "first_non_null", name_generator: nil)
  raise Todo if name_generator
  Utils.wrap_expr(_rbexpr.list_to_struct(n_field_strategy, name_generator, 0))
end

#unique(maintain_order: false) ⇒ Expr

Get the unique/distinct values in the list.

Examples:

df = Polars::DataFrame.new(
  {
    "a" => [[1, 1, 2]]
  }
)
df.select(Polars.col("a").list.unique)
# =>
# shape: (1, 1)
# ┌───────────┐
# │ a         │
# │ ---       │
# │ list[i64] │
# ╞═══════════╡
# │ [1, 2]    │
# └───────────┘


188
189
190
# File 'lib/polars/list_expr.rb', line 188

def unique(maintain_order: false)
  Utils.wrap_expr(_rbexpr.list_unique(maintain_order))
end