Class: Polars::StringNameSpace

Inherits:
Object
  • Object
show all
Defined in:
lib/polars/string_name_space.rb

Overview

Series.str namespace.

Instance Method Summary collapse

Dynamic Method Handling

This class handles dynamic methods through the method_missing method in the class Polars::ExprDispatch

Instance Method Details

#contains(pattern, literal: false) ⇒ Series

Check if strings in Series contain a substring that matches a regex.

Examples:

s = Polars::Series.new(["Crab", "cat and dog", "rab$bit", nil])
s.str.contains("cat|bit")
# =>
# shape: (4,)
# Series: '' [bool]
# [
#         false
#         true
#         true
#         null
# ]
s.str.contains("rab$", literal: true)
# =>
# shape: (4,)
# Series: '' [bool]
# [
#         false
#         false
#         true
#         null
# ]


296
297
298
# File 'lib/polars/string_name_space.rb', line 296

def contains(pattern, literal: false)
  super
end

#contains_any(patterns, ascii_case_insensitive: false) ⇒ Series

Note:

This method supports matching on string literals only, and does not support regular expression matching.

Use the Aho-Corasick algorithm to find matches.

Determines if any of the patterns are contained in the string.

Examples:

s = Polars::Series.new(
  "lyrics",
  [
    "Everybody wants to rule the world",
    "Tell me what you want, what you really really want",
    "Can you feel the love tonight"
  ]
)
s.str.contains_any(["you", "me"])
# =>
# shape: (3,)
# Series: 'lyrics' [bool]
# [
#         false
#         true
#         true
# ]


1295
1296
1297
1298
1299
1300
# File 'lib/polars/string_name_space.rb', line 1295

def contains_any(
  patterns,
  ascii_case_insensitive: false
)
  super
end

#count_matches(pattern) ⇒ Series Also known as: count_match

Count all successive non-overlapping regex matches.

Examples:

s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
s.str.count_matches('\d')
# =>
# shape: (2,)
# Series: 'foo' [u32]
# [
#         5
#         6
# ]


623
624
625
# File 'lib/polars/string_name_space.rb', line 623

def count_matches(pattern)
  super
end

#decode(encoding, strict: false) ⇒ Series

Decode a value using the provided encoding.

Examples:

s = Polars::Series.new(["666f6f", "626172", nil])
s.str.decode("hex")
# =>
# shape: (3,)
# Series: '' [binary]
# [
#         b"foo"
#         b"bar"
#         null
# ]


426
427
428
# File 'lib/polars/string_name_space.rb', line 426

def decode(encoding, strict: false)
  super
end

#encode(encoding) ⇒ Series

Encode a value using the provided encoding.

Examples:

s = Polars::Series.new(["foo", "bar", nil])
s.str.encode("hex")
# =>
# shape: (3,)
# Series: '' [str]
# [
#         "666f6f"
#         "626172"
#         null
# ]


448
449
450
# File 'lib/polars/string_name_space.rb', line 448

def encode(encoding)
  super
end

#ends_with(sub) ⇒ Series

Check if string values end with a substring.

Examples:

s = Polars::Series.new("fruits", ["apple", "mango", nil])
s.str.ends_with("go")
# =>
# shape: (3,)
# Series: 'fruits' [bool]
# [
#         false
#         true
#         null
# ]


377
378
379
# File 'lib/polars/string_name_space.rb', line 377

def ends_with(sub)
  super
end

#escape_regexSeries

Returns string values with all regular expression meta characters escaped.

Examples:

Polars::Series.new(["abc", "def", nil, "abc(\\w+)"]).str.escape_regex
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "abc"
#         "def"
#         null
#         "abc\(\\w\+\)"
# ]


1541
1542
1543
# File 'lib/polars/string_name_space.rb', line 1541

def escape_regex
  super
end

#extract(pattern, group_index: 1) ⇒ Series

Extract the target capture group from provided patterns.

Examples:

df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
df.select([Polars.col("foo").str.extract('(\d+)')])
# =>
# shape: (2, 1)
# ┌─────┐
# │ foo │
# │ --- │
# │ str │
# ╞═════╡
# │ 123 │
# │ 678 │
# └─────┘


545
546
547
# File 'lib/polars/string_name_space.rb', line 545

def extract(pattern, group_index: 1)
  super
end

#extract_all(pattern) ⇒ Series

Extracts all matches for the given regex pattern.

Extract each successive non-overlapping regex match in an individual string as an array

Examples:

s = Polars::Series.new("foo", ["123 bla 45 asd", "xyz 678 910t"])
s.str.extract_all('(\d+)')
# =>
# shape: (2,)
# Series: 'foo' [list[str]]
# [
#         ["123", "45"]
#         ["678", "910"]
# ]


569
570
571
# File 'lib/polars/string_name_space.rb', line 569

def extract_all(pattern)
  super
end

#extract_groups(pattern) ⇒ Series

Note:

All group names are strings.

Extract all capture groups for the given regex pattern.

Examples:

s = Polars::Series.new(
  "url",
  [
    "http://vote.com/ballon_dor?candidate=messi&ref=python",
    "http://vote.com/ballon_dor?candidate=weghorst&ref=polars",
    "http://vote.com/ballon_dor?error=404&ref=rust"
  ]
)
s.str.extract_groups("candidate=(?<candidate>\\w+)&ref=(?<ref>\\w+)")
# =>
# shape: (3,)
# Series: 'url' [struct[2]]
# [
#         {"messi","python"}
#         {"weghorst","polars"}
#         {null,null}
# ]


602
603
604
# File 'lib/polars/string_name_space.rb', line 602

def extract_groups(pattern)
  super
end

#extract_many(patterns, ascii_case_insensitive: false, overlapping: false) ⇒ Series

Note:

This method supports matching on string literals only, and does not support regular expression matching.

Use the Aho-Corasick algorithm to extract many matches.

Examples:

s = Polars::Series.new("values", ["discontent"])
patterns = ["winter", "disco", "onte", "discontent"]
s.str.extract_many(patterns, overlapping: true)
# =>
# shape: (1,)
# Series: 'values' [list[str]]
# [
#         ["disco", "onte", "discontent"]
# ]


1415
1416
1417
1418
1419
1420
1421
# File 'lib/polars/string_name_space.rb', line 1415

def extract_many(
  patterns,
  ascii_case_insensitive: false,
  overlapping: false
)
  super
end

#find(pattern, literal: false, strict: true) ⇒ Series

Note:

To modify regular expression behaviour (such as case-sensitivity) with flags, use the inline (?iLmsuxU) syntax.

Return the bytes offset of the first substring matching a pattern.

If the pattern is not found, returns nil.

Examples:

Find the index of the first substring matching a regex pattern:

s = Polars::Series.new("txt", ["Crab", "Lobster", nil, "Crustacean"])
s.str.find("a|e").rename("idx_rx")
# =>
# shape: (4,)
# Series: 'idx_rx' [u32]
# [
#         2
#         5
#         null
#         5
# ]

Find the index of the first substring matching a literal pattern:

s.str.find("e", literal: true).rename("idx_lit")
# =>
# shape: (4,)
# Series: 'idx_lit' [u32]
# [
#         null
#         5
#         null
#         7
# ]

Match against a pattern found in another column or (expression):

p = Polars::Series.new("pat", ["a[bc]", "b.t", "[aeiuo]", "(?i)A[BC]"])
s.str.find(p).rename("idx")
# =>
# shape: (4,)
# Series: 'idx' [u32]
# [
#         2
#         2
#         null
#         5
# ]


355
356
357
# File 'lib/polars/string_name_space.rb', line 355

def find(pattern, literal: false, strict: true)
  super
end

#find_many(patterns, ascii_case_insensitive: false, overlapping: false) ⇒ Series

Note:

This method supports matching on string literals only, and does not support regular expression matching.

Use the Aho-Corasick algorithm to find all matches.

The function returns the byte offset of the start of each match. The return type will be List<UInt32>

Examples:

df = Polars::DataFrame.new({"values" => ["discontent"]})
patterns = ["winter", "disco", "onte", "discontent"]
df.with_columns(
  Polars.col("values")
  .str.extract_many(patterns, overlapping: false)
  .alias("matches"),
  Polars.col("values")
  .str.extract_many(patterns, overlapping: true)
  .alias("matches_overlapping")
)
# =>
# shape: (1, 3)
# ┌────────────┬───────────┬─────────────────────────────────┐
# │ values     ┆ matches   ┆ matches_overlapping             │
# │ ---        ┆ ---       ┆ ---                             │
# │ str        ┆ list[str] ┆ list[str]                       │
# ╞════════════╪═══════════╪═════════════════════════════════╡
# │ discontent ┆ ["disco"] ┆ ["disco", "onte", "discontent"… │
# └────────────┴───────────┴─────────────────────────────────┘
df = Polars::DataFrame.new(
  {
    "values" => ["discontent", "rhapsody"],
    "patterns" => [
      ["winter", "disco", "onte", "discontent"],
      ["rhap", "ody", "coalesce"]
    ]
  }
)
df.select(Polars.col("values").str.find_many("patterns"))
# =>
# shape: (2, 1)
# ┌───────────┐
# │ values    │
# │ ---       │
# │ list[u32] │
# ╞═══════════╡
# │ [0]       │
# │ [0, 5]    │
# └───────────┘


1485
1486
1487
1488
1489
1490
1491
# File 'lib/polars/string_name_space.rb', line 1485

def find_many(
  patterns,
  ascii_case_insensitive: false,
  overlapping: false
)
  super
end

#head(n) ⇒ Series

Return the first n characters of each string in a String Series.

Examples:

Return up to the first 5 characters.

s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
s.str.head(5)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "pear"
#         null
#         "papay"
#         "drago"
# ]

Return up to the 3rd character from the end.

s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
s.str.head(-3)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "p"
#         null
#         "pap"
#         "dragonfr"
# ]


1171
1172
1173
# File 'lib/polars/string_name_space.rb', line 1171

def head(n)
  super
end

#join(delimiter = "-", ignore_nulls: true) ⇒ Series Also known as: concat

Vertically concat the values in the Series to a single string value.

Examples:

Polars::Series.new([1, nil, 2]).str.join("-")
# =>
# shape: (1,)
# Series: '' [str]
# [
#         "1-2"
# ]
Polars::Series.new([1, nil, 2]).str.join("-", ignore_nulls: false)
# =>
# shape: (1,)
# Series: '' [str]
# [
#         null
# ]


1521
1522
1523
# File 'lib/polars/string_name_space.rb', line 1521

def join(delimiter = "-", ignore_nulls: true)
  super
end

#json_decode(dtype = nil, infer_schema_length: 100) ⇒ Series

Parse string values as JSON.

Throws an error if invalid JSON strings are encountered.

Examples:

s = Polars::Series.new("json", ['{"a":1, "b": true}', nil, '{"a":2, "b": false}'])
s.str.json_decode
# =>
# shape: (3,)
# Series: 'json' [struct[2]]
# [
#         {1,true}
#         null
#         {2,false}
# ]


476
477
478
479
480
481
482
483
484
485
486
487
# File 'lib/polars/string_name_space.rb', line 476

def json_decode(dtype = nil, infer_schema_length: 100)
  if !dtype.nil?
    s = Utils.wrap_s(_s)
    return (
      s.to_frame
      .select_seq(F.col(s.name).str.json_decode(dtype))
      .to_series
    )
  end

  Utils.wrap_s(_s.str_json_decode(infer_schema_length))
end

#json_path_match(json_path) ⇒ Series

Extract the first match of json string with provided JSONPath expression.

Throw errors if encounter invalid json strings. All return value will be casted to Utf8 regardless of the original value.

Documentation on JSONPath standard can be found here.

Examples:

df = Polars::DataFrame.new(
  {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
)
df.select(Polars.col("json_val").str.json_path_match("$.a"))[0.., 0]
# =>
# shape: (5,)
# Series: 'json_val' [str]
# [
#         "1"
#         null
#         "2"
#         "2.1"
#         "true"
# ]


517
518
519
# File 'lib/polars/string_name_space.rb', line 517

def json_path_match(json_path)
  super
end

#len_bytesSeries Also known as: lengths

Return the length of each string as the number of bytes.

Examples:

s = Polars::Series.new(["Café", "345", "東京", nil])
s.str.len_bytes
# =>
# shape: (4,)
# Series: '' [u32]
# [
#         5
#         3
#         6
#         null
# ]


237
238
239
# File 'lib/polars/string_name_space.rb', line 237

def len_bytes
  super
end

#len_charsSeries Also known as: n_chars

Return the length of each string as the number of characters.

Examples:

s = Polars::Series.new(["Café", "345", "東京", nil])
s.str.len_chars
# =>
# shape: (4,)
# Series: '' [u32]
# [
#         4
#         3
#         2
#         null
# ]


258
259
260
# File 'lib/polars/string_name_space.rb', line 258

def len_chars
  super
end

#ljust(width, fillchar = " ") ⇒ Series

Return the string left justified in a string of length width.

Padding is done using the specified fillchar. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
s.str.ljust(8, "*")
# =>
# shape: (4,)
# Series: 'a' [str]
# [
#         "cow*****"
#         "monkey**"
#         null
#         "hippopotamus"
# ]


1012
1013
1014
# File 'lib/polars/string_name_space.rb', line 1012

def ljust(width, fillchar = " ")
  super
end

#normalize(form = "NFC") ⇒ Series

Returns the Unicode normal form of the string values.

This uses the forms described in Unicode Standard Annex 15: https://www.unicode.org/reports/tr15/.

Examples:

s = Polars::Series.new(["01²", "KADOKAWA"])
s.str.normalize("NFC")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "01²"
#         "KADOKAWA"
# ]
s.str.normalize("NFKC")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "012"
#         "KADOKAWA"
# ]


1574
1575
1576
# File 'lib/polars/string_name_space.rb', line 1574

def normalize(form = "NFC")
  super
end

#pad_end(length, fill_char = " ") ⇒ Series

Pad the end of the string until it reaches the given length.

Examples:

s = Polars::Series.new(["cow", "monkey", "hippopotamus", nil])
s.str.pad_end(8, "*")
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "cow*****"
#         "monkey**"
#         "hippopotamus"
#         null
# ]


954
955
956
# File 'lib/polars/string_name_space.rb', line 954

def pad_end(length, fill_char = " ")
  super
end

#pad_start(length, fill_char = " ") ⇒ Series

Pad the start of the string until it reaches the given length.

Examples:

s = Polars::Series.new("a", ["cow", "monkey", "hippopotamus", nil])
s.str.pad_start(8, "*")
# =>
# shape: (4,)
# Series: 'a' [str]
# [
#         "*****cow"
#         "**monkey"
#         "hippopotamus"
#         null
# ]


928
929
930
# File 'lib/polars/string_name_space.rb', line 928

def pad_start(length, fill_char = " ")
  super
end

#replace(pattern, value, literal: false) ⇒ Series

Replace first matching regex/literal substring with a new string value.

Examples:

s = Polars::Series.new(["123abc", "abc456"])
s.str.replace('abc\b', "ABC")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "123ABC"
#         "abc456"
# ]


762
763
764
# File 'lib/polars/string_name_space.rb', line 762

def replace(pattern, value, literal: false)
  super
end

#replace_all(pattern, value, literal: false) ⇒ Series

Replace all matching regex/literal substrings with a new string value.

Examples:

df = Polars::Series.new(["abcabc", "123a123"])
df.str.replace_all("a", "-")
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "-bc-bc"
#         "123-123"
# ]


787
788
789
# File 'lib/polars/string_name_space.rb', line 787

def replace_all(pattern, value, literal: false)
  super
end

#replace_many(patterns, replace_with = Expr::NO_DEFAULT, ascii_case_insensitive: false) ⇒ Series

Note:

This method supports matching on string literals only, and does not support regular expression matching.

Use the Aho-Corasick algorithm to replace many matches.

Examples:

Replace many patterns by passing lists of equal length to the patterns and replace_with parameters.

s = Polars::Series.new(
  "lyrics",
  [
    "Everybody wants to rule the world",
    "Tell me what you want, what you really really want",
    "Can you feel the love tonight"
  ]
)
s.str.replace_many(["you", "me"], ["me", "you"])
# =>
# shape: (3,)
# Series: 'lyrics' [str]
# [
#         "Everybody wants to rule the wo…
#         "Tell you what me want, what me…
#         "Can me feel the love tonight"
# ]

Broadcast a replacement for many patterns by passing a sequence of length 1 to the replace_with parameter.

s = Polars::Series.new(
  "lyrics",
  [
    "Everybody wants to rule the world",
    "Tell me what you want, what you really really want",
    "Can you feel the love tonight",
  ]
)
s.str.replace_many(["me", "you", "they"], [""])
# =>
# shape: (3,)
# Series: 'lyrics' [str]
# [
#         "Everybody wants to rule the wo…
#         "Tell  what  want, what  really…
#         "Can  feel the love tonight"
# ]

Passing a mapping with patterns and replacements is also supported as syntactic sugar.

s = Polars::Series.new(
  "lyrics",
  [
    "Everybody wants to rule the world",
    "Tell me what you want, what you really really want",
    "Can you feel the love tonight"
  ]
)
mapping = {"me" => "you", "you" => "me", "want" => "need"}
s.str.replace_many(mapping)
# =>
# shape: (3,)
# Series: 'lyrics' [str]
# [
#         "Everybody needs to rule the wo…
#         "Tell you what me need, what me…
#         "Can me feel the love tonight"
# ]


1380
1381
1382
1383
1384
1385
1386
# File 'lib/polars/string_name_space.rb', line 1380

def replace_many(
  patterns,
  replace_with = Expr::NO_DEFAULT,
  ascii_case_insensitive: false
)
  super
end

#reverseSeries

Returns string values in reversed order.

Examples:

s = Polars::Series.new("text", ["foo", "bar", "man\u0303ana"])
s.str.reverse
# =>
# shape: (3,)
# Series: 'text' [str]
# [
#         "oof"
#         "rab"
#         "anañam"
# ]


1095
1096
1097
# File 'lib/polars/string_name_space.rb', line 1095

def reverse
  super
end

#rjust(width, fillchar = " ") ⇒ Series

Return the string right justified in a string of length width.

Padding is done using the specified fillchar. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new("a", ["cow", "monkey", nil, "hippopotamus"])
s.str.rjust(8, "*")
# =>
# shape: (4,)
# Series: 'a' [str]
# [
#         "*****cow"
#         "**monkey"
#         null
#         "hippopotamus"
# ]


1040
1041
1042
# File 'lib/polars/string_name_space.rb', line 1040

def rjust(width, fillchar = " ")
  super
end

#slice(offset, length = nil) ⇒ Series

Create subslices of the string values of a Utf8 Series.

Examples:

s = Polars::Series.new("s", ["pear", nil, "papaya", "dragonfruit"])
s.str.slice(-3)
# =>
# shape: (4,)
# Series: 's' [str]
# [
#         "ear"
#         null
#         "aya"
#         "uit"
# ]

Using the optional length parameter

s.str.slice(4, 3)
# =>
# shape: (4,)
# Series: 's' [str]
# [
#         ""
#         null
#         "ya"
#         "onf"
# ]


1133
1134
1135
1136
# File 'lib/polars/string_name_space.rb', line 1133

def slice(offset, length = nil)
  s = Utils.wrap_s(_s)
  s.to_frame.select(Polars.col(s.name).str.slice(offset, length)).to_series
end

#split(by, inclusive: false) ⇒ Series

Split the string by a substring.



636
637
638
# File 'lib/polars/string_name_space.rb', line 636

def split(by, inclusive: false)
  super
end

#split_exact(by, n, inclusive: false) ⇒ Series

Split the string by a substring using n splits.

Results in a struct of n+1 fields.

If it cannot make n splits, the remaining field elements will be null.

Examples:

df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
df["x"].str.split_exact("_", 1).alias("fields")
# =>
# shape: (4,)
# Series: 'fields' [struct[2]]
# [
#         {"a","1"}
#         {null,null}
#         {"c",null}
#         {"d","4"}
# ]

Split string values in column x in exactly 2 parts and assign each part to a new column.

df["x"]
  .str.split_exact("_", 1)
  .struct.rename_fields(["first_part", "second_part"])
  .alias("fields")
  .to_frame
  .unnest("fields")
# =>
# shape: (4, 2)
# ┌────────────┬─────────────┐
# │ first_part ┆ second_part │
# │ ---        ┆ ---         │
# │ str        ┆ str         │
# ╞════════════╪═════════════╡
# │ a          ┆ 1           │
# │ null       ┆ null        │
# │ c          ┆ null        │
# │ d          ┆ 4           │
# └────────────┴─────────────┘


687
688
689
# File 'lib/polars/string_name_space.rb', line 687

def split_exact(by, n, inclusive: false)
  super
end

#splitn(by, n) ⇒ Series

Split the string by a substring, restricted to returning at most n items.

If the number of possible splits is less than n-1, the remaining field elements will be null. If the number of possible splits is n-1 or greater, the last (nth) substring will contain the remainder of the string.

Examples:

df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
df["s"].str.splitn(" ", 2).alias("fields")
# =>
# shape: (4,)
# Series: 'fields' [struct[2]]
# [
#         {"foo","bar"}
#         {null,null}
#         {"foo-bar",null}
#         {"foo","bar baz"}
# ]

Split string values in column s in exactly 2 parts and assign each part to a new column.

df["s"]
  .str.splitn(" ", 2)
  .struct.rename_fields(["first_part", "second_part"])
  .alias("fields")
  .to_frame
  .unnest("fields")
# =>
# shape: (4, 2)
# ┌────────────┬─────────────┐
# │ first_part ┆ second_part │
# │ ---        ┆ ---         │
# │ str        ┆ str         │
# ╞════════════╪═════════════╡
# │ foo        ┆ bar         │
# │ null       ┆ null        │
# │ foo-bar    ┆ null        │
# │ foo        ┆ bar baz     │
# └────────────┴─────────────┘


736
737
738
739
# File 'lib/polars/string_name_space.rb', line 736

def splitn(by, n)
  s = Utils.wrap_s(_s)
  s.to_frame.select(Polars.col(s.name).str.splitn(by, n)).to_series
end

#starts_with(sub) ⇒ Series

Check if string values start with a substring.

Examples:

s = Polars::Series.new("fruits", ["apple", "mango", nil])
s.str.starts_with("app")
# =>
# shape: (3,)
# Series: 'fruits' [bool]
# [
#         true
#         false
#         null
# ]


399
400
401
# File 'lib/polars/string_name_space.rb', line 399

def starts_with(sub)
  super
end

#strip_chars(matches = nil) ⇒ Series

Remove leading and trailing whitespace.

Examples:

s = Polars::Series.new([" hello ", "\tworld"])
s.str.strip_chars
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "hello"
#         "world"
# ]


808
809
810
# File 'lib/polars/string_name_space.rb', line 808

def strip_chars(matches = nil)
  super
end

#strip_chars_end(matches = nil) ⇒ Series Also known as: rstrip

Remove trailing whitespace.

Examples:

s = Polars::Series.new([" hello ", "world\t"])
s.str.strip_chars_end
# =>
# shape: (2,)
# Series: '' [str]
# [
#         " hello"
#         "world"
# ]


851
852
853
# File 'lib/polars/string_name_space.rb', line 851

def strip_chars_end(matches = nil)
  super
end

#strip_chars_start(matches = nil) ⇒ Series Also known as: lstrip

Remove leading whitespace.

Examples:

s = Polars::Series.new([" hello ", "\tworld"])
s.str.strip_chars_start
# =>
# shape: (2,)
# Series: '' [str]
# [
#         "hello "
#         "world"
# ]


829
830
831
# File 'lib/polars/string_name_space.rb', line 829

def strip_chars_start(matches = nil)
  super
end

#strip_prefix(prefix) ⇒ Series

Remove prefix.

The prefix will be removed from the string exactly once, if found.

Examples:

s = Polars::Series.new(["foobar", "foofoobar", "foo", "bar"])
s.str.strip_prefix("foo")
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "bar"
#         "foobar"
#         ""
#         "bar"
# ]


877
878
879
# File 'lib/polars/string_name_space.rb', line 877

def strip_prefix(prefix)
  super
end

#strip_suffix(suffix) ⇒ Series

Remove suffix.

The suffix will be removed from the string exactly once, if found.

Examples:

s = Polars::Series.new(["foobar", "foobarbar", "foo", "bar"])
s.str.strip_suffix("bar")
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "foo"
#         "foobar"
#         "foo"
#         ""
# ]


902
903
904
# File 'lib/polars/string_name_space.rb', line 902

def strip_suffix(suffix)
  super
end

#strptime(datatype, fmt = nil, strict: true, exact: true, cache: true) ⇒ Series

Parse a Series of dtype Utf8 to a Date/Datetime Series.

Examples:

Dealing with a consistent format:

s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
# =>
# shape: (2,)
# Series: '' [datetime[μs, UTC]]
# [
#         2020-01-01 01:00:00 UTC
#         2020-01-01 02:00:00 UTC
# ]

Dealing with different formats.

s = Polars::Series.new(
  "date",
  [
    "2021-04-22",
    "2022-01-04 00:00:00",
    "01/31/22",
    "Sun Jul  8 00:34:60 2001"
  ]
)
s.to_frame.select(
  Polars.coalesce(
    Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
    Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
  )
).to_series
# =>
# shape: (4,)
# Series: 'date' [date]
# [
#         2021-04-22
#         2022-01-04
#         2022-01-31
#         2001-07-08
# ]


183
184
185
# File 'lib/polars/string_name_space.rb', line 183

def strptime(datatype, fmt = nil, strict: true, exact: true, cache: true)
  super
end

#tail(n) ⇒ Series

Return the last n characters of each string in a String Series.

Examples:

Return up to the last 5 characters:

s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
s.str.tail(5)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "pear"
#         null
#         "apaya"
#         "fruit"
# ]

Return from the 3rd character to the end:

s = Polars::Series.new(["pear", nil, "papaya", "dragonfruit"])
s.str.tail(-3)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "r"
#         null
#         "aya"
#         "gonfruit"
# ]


1208
1209
1210
# File 'lib/polars/string_name_space.rb', line 1208

def tail(n)
  super
end

#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Series

Convert a Utf8 column into a Date column.

Examples:

s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
s.str.to_date
# =>
# shape: (3,)
# Series: '' [date]
# [
#         2020-01-01
#         2020-02-01
#         2020-03-01
# ]


41
42
43
# File 'lib/polars/string_name_space.rb', line 41

def to_date(format = nil, strict: true, exact: true, cache: true)
  super
end

#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true, ambiguous: "raise") ⇒ Series

Convert a Utf8 column into a Datetime column.

Examples:

s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
s.str.to_datetime("%Y-%m-%d %H:%M%#z")
# =>
# shape: (2,)
# Series: '' [datetime[μs, UTC]]
# [
#         2020-01-01 01:00:00 UTC
#         2020-01-01 02:00:00 UTC
# ]


86
87
88
89
90
91
92
93
94
95
96
# File 'lib/polars/string_name_space.rb', line 86

def to_datetime(
  format = nil,
  time_unit: nil,
  time_zone: nil,
  strict: true,
  exact: true,
  cache: true,
  ambiguous: "raise"
)
  super
end

#to_decimal(inference_length = 100, scale: nil) ⇒ Series

Convert a String column into a Decimal column.

This method infers the needed parameters precision and scale.

Examples:

s = Polars::Series.new(
  ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]
)
s.str.to_decimal
# =>
# shape: (7,)
# Series: '' [decimal[8,2]]
# [
#         40.12
#         3420.13
#         120134.19
#         3212.98
#         12.90
#         143.09
#         143.90
# ]


213
214
215
216
217
218
219
# File 'lib/polars/string_name_space.rb', line 213

def to_decimal(inference_length = 100, scale: nil)
  if !scale.nil?
    raise Todo
  end

  Utils.wrap_s(_s.str_to_decimal_infer(inference_length))
end

#to_integer(base: 10, dtype: Int64, strict: true) ⇒ Series

Convert an String column into a column of dtype with base radix.

Examples:

s = Polars::Series.new("bin", ["110", "101", "010", "invalid"])
s.str.to_integer(base: 2, dtype: Polars::Int32, strict: false)
# =>
# shape: (4,)
# Series: 'bin' [i32]
# [
#         6
#         5
#         2
#         null
# ]
s = Polars::Series.new("hex", ["fa1e", "ff00", "cafe", nil])
s.str.to_integer(base: 16)
# =>
# shape: (4,)
# Series: 'hex' [i64]
# [
#         64030
#         65280
#         51966
#         null
# ]


1252
1253
1254
1255
1256
1257
1258
# File 'lib/polars/string_name_space.rb', line 1252

def to_integer(
  base: 10,
  dtype: Int64,
  strict: true
)
  super
end

#to_lowercaseSeries

Modify the strings to their lowercase equivalent.

Examples:

s = Polars::Series.new("foo", ["CAT", "DOG"])
s.str.to_lowercase
# =>
# shape: (2,)
# Series: 'foo' [str]
# [
#         "cat"
#         "dog"
# ]


1058
1059
1060
# File 'lib/polars/string_name_space.rb', line 1058

def to_lowercase
  super
end

#to_time(format = nil, strict: true, cache: true) ⇒ Series

Convert a Utf8 column into a Time column.

Examples:

s = Polars::Series.new(["01:00", "02:00", "03:00"])
s.str.to_time("%H:%M")
# =>
# shape: (3,)
# Series: '' [time]
# [
#         01:00:00
#         02:00:00
#         03:00:00
# ]


123
124
125
# File 'lib/polars/string_name_space.rb', line 123

def to_time(format = nil, strict: true, cache: true)
  super
end

#to_uppercaseSeries

Modify the strings to their uppercase equivalent.

Examples:

s = Polars::Series.new("foo", ["cat", "dog"])
s.str.to_uppercase
# =>
# shape: (2,)
# Series: 'foo' [str]
# [
#         "CAT"
#         "DOG"
# ]


1076
1077
1078
# File 'lib/polars/string_name_space.rb', line 1076

def to_uppercase
  super
end

#zfill(length) ⇒ Series

Fills the string with zeroes.

Return a copy of the string left filled with ASCII '0' digits to make a string of length width.

A leading sign prefix ('+'/'-') is handled by inserting the padding after the sign character rather than before. The original string is returned if width is less than or equal to s.length.

Examples:

s = Polars::Series.new([-1, 123, 999999, nil])
s.cast(Polars::String).str.zfill(4)
# =>
# shape: (4,)
# Series: '' [str]
# [
#         "-001"
#         "0123"
#         "999999"
#         null
# ]


984
985
986
# File 'lib/polars/string_name_space.rb', line 984

def zfill(length)
  super
end