Class: Polars::BinaryExpr

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/binary_expr.rb

Overview

Namespace for binary related expressions.

Instance Method Summary collapse

Instance Method Details

#contains(literal) ⇒ Expr

Check if binaries in Series contain a binary substring.

Examples:

colors = Polars::DataFrame.new(
  {
    "name" => ["black", "yellow", "blue"],
    "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
    "lit" => ["\x00".b, "\xff\x00".b, "\xff\xff".b]
  }
)
colors.select(
  "name",
  Polars.col("code").bin.contains("\xff".b).alias("contains_with_lit"),
  Polars.col("code").bin.contains(Polars.col("lit")).alias("contains_with_expr"),
)
# =>
# shape: (3, 3)
# ┌────────┬───────────────────┬────────────────────┐
# │ name   ┆ contains_with_lit ┆ contains_with_expr │
# │ ---    ┆ ---               ┆ ---                │
# │ str    ┆ bool              ┆ bool               │
# ╞════════╪═══════════════════╪════════════════════╡
# │ black  ┆ false             ┆ true               │
# │ yellow ┆ true              ┆ true               │
# │ blue   ┆ true              ┆ false              │
# └────────┴───────────────────┴────────────────────┘

Parameters:

  • literal (String)

    The binary substring to look for

Returns:



43
44
45
46
# File 'lib/polars/binary_expr.rb', line 43

def contains(literal)
  literal = Utils.parse_into_expression(literal, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.binary_contains(literal))
end

#decode(encoding, strict: true) ⇒ Expr

Decode a value using the provided encoding.

Examples:

colors = Polars::DataFrame.new(
  {
    "name" => ["black", "yellow", "blue"],
    "encoded" => ["000000".b, "ffff00".b, "0000ff".b]
  }
)
colors.with_columns(
  Polars.col("encoded").bin.decode("hex").alias("code")
)
# =>
# shape: (3, 3)
# ┌────────┬───────────┬─────────────────┐
# │ name   ┆ encoded   ┆ code            │
# │ ---    ┆ ---       ┆ ---             │
# │ str    ┆ binary    ┆ binary          │
# ╞════════╪═══════════╪═════════════════╡
# │ black  ┆ b"000000" ┆ b"\x00\x00\x00" │
# │ yellow ┆ b"ffff00" ┆ b"\xff\xff\x00" │
# │ blue   ┆ b"0000ff" ┆ b"\x00\x00\xff" │
# └────────┴───────────┴─────────────────┘

Parameters:

  • encoding ("hex", "base64")

    The encoding to use.

  • strict (Boolean) (defaults to: true)

    Raise an error if the underlying value cannot be decoded, otherwise mask out with a null value.

Returns:



153
154
155
156
157
158
159
160
161
# File 'lib/polars/binary_expr.rb', line 153

def decode(encoding, strict: true)
  if encoding == "hex"
    Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
  elsif encoding == "base64"
    Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
  else
    raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
  end
end

#encode(encoding) ⇒ Expr

Encode a value using the provided encoding.

Examples:

colors = Polars::DataFrame.new(
  {
    "color" => ["black", "yellow", "blue"],
    "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b]
  }
)
colors.with_columns(
  Polars.col("code").bin.encode("hex").alias("encoded")
)
# =>
# shape: (3, 3)
# ┌────────┬─────────────────┬─────────┐
# │ color  ┆ code            ┆ encoded │
# │ ---    ┆ ---             ┆ ---     │
# │ str    ┆ binary          ┆ str     │
# ╞════════╪═════════════════╪═════════╡
# │ black  ┆ b"\x00\x00\x00" ┆ 000000  │
# │ yellow ┆ b"\xff\xff\x00" ┆ ffff00  │
# │ blue   ┆ b"\x00\x00\xff" ┆ 0000ff  │
# └────────┴─────────────────┴─────────┘

Parameters:

  • encoding ("hex", "base64")

    The encoding to use.

Returns:



191
192
193
194
195
196
197
198
199
# File 'lib/polars/binary_expr.rb', line 191

def encode(encoding)
  if encoding == "hex"
    Utils.wrap_expr(_rbexpr.binary_hex_encode)
  elsif encoding == "base64"
    Utils.wrap_expr(_rbexpr.binary_base64_encode)
  else
    raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
  end
end

#ends_with(suffix) ⇒ Expr

Check if string values end with a binary substring.

Examples:

colors = Polars::DataFrame.new(
  {
    "name" => ["black", "yellow", "blue"],
    "code" => ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
    "suffix" => ["\x00".b, "\xff\x00".b, "\x00\x00".b]
  }
)
colors.select(
  "name",
  Polars.col("code").bin.ends_with("\xff".b).alias("ends_with_lit"),
  Polars.col("code").bin.ends_with(Polars.col("suffix")).alias("ends_with_expr")
)
# =>
# shape: (3, 3)
# ┌────────┬───────────────┬────────────────┐
# │ name   ┆ ends_with_lit ┆ ends_with_expr │
# │ ---    ┆ ---           ┆ ---            │
# │ str    ┆ bool          ┆ bool           │
# ╞════════╪═══════════════╪════════════════╡
# │ black  ┆ false         ┆ true           │
# │ yellow ┆ false         ┆ true           │
# │ blue   ┆ true          ┆ false          │
# └────────┴───────────────┴────────────────┘

Parameters:

  • suffix (String)

    Suffix substring.

Returns:



79
80
81
82
# File 'lib/polars/binary_expr.rb', line 79

def ends_with(suffix)
  suffix = Utils.parse_into_expression(suffix, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.binary_ends_with(suffix))
end

#reinterpret(dtype:, endianness: "little") ⇒ Expr

Interpret a buffer as a numerical Polars type.

Examples:

df = Polars::DataFrame.new({"data" => ["\x05\x00\x00\x00".b, "\x10\x00\x01\x00".b]})
df.with_columns(
  bin2int: Polars.col("data").bin.reinterpret(
   dtype: Polars::Int32, endianness: "little"
  )
)
# =>
# shape: (2, 2)
# ┌─────────────────────┬─────────┐
# │ data                ┆ bin2int │
# │ ---                 ┆ ---     │
# │ binary              ┆ i32     │
# ╞═════════════════════╪═════════╡
# │ b"\x05\x00\x00\x00" ┆ 5       │
# │ b"\x10\x00\x01\x00" ┆ 65552   │
# └─────────────────────┴─────────┘

Parameters:

  • dtype (Object)

    Which type to interpret binary column into.

  • endianness (defaults to: "little")

    : ["big", "little"] Which endianness to use when interpreting bytes, by default "little".

Returns:



257
258
259
260
261
262
263
264
265
266
# File 'lib/polars/binary_expr.rb', line 257

def reinterpret(
  dtype:,
  endianness: "little"
)
  dtype = Utils.parse_into_datatype_expr(dtype)

  Utils.wrap_expr(
    _rbexpr.bin_reinterpret(dtype._rbdatatype_expr, endianness)
  )
end

#size(unit = "b") ⇒ Expr

Get the size of binary values in the given unit.

Examples:

df = Polars::DataFrame.new({"data" => [512, 256, 1024].map { |n| "\x00".b * n }})
df.with_columns(
  n_bytes: Polars.col("data").bin.size,
  n_kilobytes: Polars.col("data").bin.size("kb")
)
# =>
# shape: (3, 3)
# ┌─────────────────────────────────┬─────────┬─────────────┐
# │ data                            ┆ n_bytes ┆ n_kilobytes │
# │ ---                             ┆ ---     ┆ ---         │
# │ binary                          ┆ u32     ┆ f64         │
# ╞═════════════════════════════════╪═════════╪═════════════╡
# │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 512     ┆ 0.5         │
# │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 256     ┆ 0.25        │
# │ b"\x00\x00\x00\x00\x00\x00\x00… ┆ 1024    ┆ 1.0         │
# └─────────────────────────────────┴─────────┴─────────────┘

Parameters:

  • unit ('b', 'kb', 'mb', 'gb', 'tb') (defaults to: "b")

    Scale the returned size to the given unit.

Returns:



225
226
227
228
229
# File 'lib/polars/binary_expr.rb', line 225

def size(unit = "b")
  sz = Utils.wrap_expr(_rbexpr.bin_size_bytes)
  sz = Utils.scale_bytes(sz, to: unit)
  sz
end

#starts_with(prefix) ⇒ Expr

Check if values start with a binary substring.

Examples:

colors = Polars::DataFrame.new(
  {
    "name": ["black", "yellow", "blue"],
    "code": ["\x00\x00\x00".b, "\xff\xff\x00".b, "\x00\x00\xff".b],
    "prefix": ["\x00".b, "\xff\x00".b, "\x00\x00".b]
  }
)
colors.select(
  "name",
  Polars.col("code").bin.starts_with("\xff".b).alias("starts_with_lit"),
  Polars.col("code")
  .bin.starts_with(Polars.col("prefix"))
  .alias("starts_with_expr")
)
# =>
# shape: (3, 3)
# ┌────────┬─────────────────┬──────────────────┐
# │ name   ┆ starts_with_lit ┆ starts_with_expr │
# │ ---    ┆ ---             ┆ ---              │
# │ str    ┆ bool            ┆ bool             │
# ╞════════╪═════════════════╪══════════════════╡
# │ black  ┆ false           ┆ true             │
# │ yellow ┆ true            ┆ false            │
# │ blue   ┆ false           ┆ true             │
# └────────┴─────────────────┴──────────────────┘

Parameters:

  • prefix (String)

    Prefix substring.

Returns:



117
118
119
120
# File 'lib/polars/binary_expr.rb', line 117

def starts_with(prefix)
  prefix = Utils.parse_into_expression(prefix, str_as_lit: true)
  Utils.wrap_expr(_rbexpr.binary_starts_with(prefix))
end