Class: Polars::StringExpr
- Inherits:
-
Object
- Object
- Polars::StringExpr
- Defined in:
- lib/polars/string_expr.rb
Overview
Namespace for string related expressions.
Instance Method Summary collapse
-
#concat(delimiter = "-", ignore_nulls: true) ⇒ Expr
Vertically concat the values in the Series to a single string value.
-
#contains(pattern, literal: false, strict: true) ⇒ Expr
Check if string contains a substring that matches a regex.
-
#count_matches(pattern, literal: false) ⇒ Expr
(also: #count_match)
Count all successive non-overlapping regex matches.
-
#decode(encoding, strict: true) ⇒ Expr
Decode a value using the provided encoding.
-
#encode(encoding) ⇒ Expr
Encode a value using the provided encoding.
-
#ends_with(sub) ⇒ Expr
Check if string values end with a substring.
-
#explode ⇒ Expr
Returns a column with a separate row for every string character.
-
#extract(pattern, group_index: 1) ⇒ Expr
Extract the target capture group from provided patterns.
-
#extract_all(pattern) ⇒ Expr
Extracts all matches for the given regex pattern.
-
#json_extract(dtype = nil, infer_schema_length: 100) ⇒ Expr
Parse string values as JSON.
-
#json_path_match(json_path) ⇒ Expr
Extract the first match of json string with provided JSONPath expression.
-
#lengths ⇒ Expr
Get length of the strings as
:u32(as number of bytes). -
#ljust(length, fillchar = " ") ⇒ Expr
(also: #pad_end)
Return the string left justified in a string of length
length. -
#n_chars ⇒ Expr
Get length of the strings as
:u32(as number of chars). -
#parse_int(radix = 2, strict: true) ⇒ Expr
Parse integers with base radix from strings.
-
#replace(pattern, value, literal: false, n: 1) ⇒ Expr
Replace first matching regex/literal substring with a new string value.
-
#replace_all(pattern, value, literal: false) ⇒ Expr
Replace all matching regex/literal substrings with a new string value.
-
#rjust(length, fillchar = " ") ⇒ Expr
(also: #pad_start)
Return the string right justified in a string of length
length. -
#slice(offset, length = nil) ⇒ Expr
Create subslices of the string values of a Utf8 Series.
-
#split(by, inclusive: false) ⇒ Expr
Split the string by a substring.
-
#split_exact(by, n, inclusive: false) ⇒ Expr
Split the string by a substring using
nsplits. -
#splitn(by, n) ⇒ Expr
Split the string by a substring, restricted to returning at most
nitems. -
#starts_with(sub) ⇒ Expr
Check if string values start with a substring.
-
#strip_chars(characters = nil) ⇒ Expr
(also: #strip)
Remove leading and trailing whitespace.
-
#strip_chars_end(characters = nil) ⇒ Expr
(also: #rstrip)
Remove trailing whitespace.
-
#strip_chars_start(characters = nil) ⇒ Expr
(also: #lstrip)
Remove leading whitespace.
-
#strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) ⇒ Expr
Parse a Utf8 expression to a Date/Datetime/Time type.
-
#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Date column.
-
#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true, use_earliest: nil, ambiguous: "raise") ⇒ Expr
Convert a Utf8 column into a Datetime column.
-
#to_integer(base: 10, strict: true) ⇒ Expr
Convert an Utf8 column into an Int64 column with base radix.
-
#to_lowercase ⇒ Expr
Transform to lowercase variant.
-
#to_time(format = nil, strict: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Time column.
-
#to_uppercase ⇒ Expr
Transform to uppercase variant.
-
#zfill(alignment) ⇒ Expr
Fills the string with zeroes.
Instance Method Details
#concat(delimiter = "-", ignore_nulls: true) ⇒ Expr
Vertically concat the values in the Series to a single string value.
312 313 314 |
# File 'lib/polars/string_expr.rb', line 312 def concat(delimiter = "-", ignore_nulls: true) Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls)) end |
#contains(pattern, literal: false, strict: true) ⇒ Expr
Check if string contains a substring that matches a regex.
577 578 579 580 |
# File 'lib/polars/string_expr.rb', line 577 def contains(pattern, literal: false, strict: true) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict)) end |
#count_matches(pattern, literal: false) ⇒ Expr Also known as: count_match
Count all successive non-overlapping regex matches.
886 887 888 889 |
# File 'lib/polars/string_expr.rb', line 886 def count_matches(pattern, literal: false) pattern = Utils.parse_as_expression(pattern, str_as_lit: true) Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal)) end |
#decode(encoding, strict: true) ⇒ Expr
Decode a value using the provided encoding.
757 758 759 760 761 762 763 764 765 |
# File 'lib/polars/string_expr.rb', line 757 def decode(encoding, strict: true) if encoding == "hex" Utils.wrap_expr(_rbexpr.str_hex_decode(strict)) elsif encoding == "base64" Utils.wrap_expr(_rbexpr.str_base64_decode(strict)) else raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}" end end |
#encode(encoding) ⇒ Expr
Encode a value using the provided encoding.
788 789 790 791 792 793 794 795 796 |
# File 'lib/polars/string_expr.rb', line 788 def encode(encoding) if encoding == "hex" Utils.wrap_expr(_rbexpr.str_hex_encode) elsif encoding == "base64" Utils.wrap_expr(_rbexpr.str_base64_encode) else raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}" end end |
#ends_with(sub) ⇒ Expr
Check if string values end with a substring.
617 618 619 620 |
# File 'lib/polars/string_expr.rb', line 617 def ends_with(sub) sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_ends_with(sub)) end |
#explode ⇒ Expr
Returns a column with a separate row for every string character.
1114 1115 1116 |
# File 'lib/polars/string_expr.rb', line 1114 def explode Utils.wrap_expr(_rbexpr.str_explode) end |
#extract(pattern, group_index: 1) ⇒ Expr
Extract the target capture group from provided patterns.
826 827 828 |
# File 'lib/polars/string_expr.rb', line 826 def extract(pattern, group_index: 1) Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index)) end |
#extract_all(pattern) ⇒ Expr
Extracts all matches for the given regex pattern.
Extracts each successive non-overlapping regex match in an individual string as an array.
857 858 859 860 |
# File 'lib/polars/string_expr.rb', line 857 def extract_all(pattern) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr)) end |
#json_extract(dtype = nil, infer_schema_length: 100) ⇒ Expr
Parse string values as JSON.
Throw errors if encounter invalid JSON strings.
689 690 691 692 693 694 |
# File 'lib/polars/string_expr.rb', line 689 def json_extract(dtype = nil, infer_schema_length: 100) if !dtype.nil? dtype = Utils.rb_type_to_dtype(dtype) end Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length)) end |
#json_path_match(json_path) ⇒ Expr
Extract the first match of json string with provided JSONPath expression.
Throw errors if encounter invalid json strings. All return value will be casted to Utf8 regardless of the original value.
Documentation on JSONPath standard can be found here.
727 728 729 |
# File 'lib/polars/string_expr.rb', line 727 def json_path_match(json_path) Utils.wrap_expr(_rbexpr.str_json_path_match(json_path)) end |
#lengths ⇒ Expr
The returned lengths are equal to the number of bytes in the UTF8 string. If you
need the length in terms of the number of characters, use n_chars instead.
Get length of the strings as :u32 (as number of bytes).
242 243 244 |
# File 'lib/polars/string_expr.rb', line 242 def lengths Utils.wrap_expr(_rbexpr.str_len_bytes) end |
#ljust(length, fillchar = " ") ⇒ Expr Also known as: pad_end
Return the string left justified in a string of length length.
Padding is done using the specified fillchar.
The original string is returned if length is less than or equal to
s.length.
509 510 511 |
# File 'lib/polars/string_expr.rb', line 509 def ljust(length, fillchar = " ") Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar)) end |
#n_chars ⇒ Expr
If you know that you are working with ASCII text, lengths will be
equivalent, and faster (returns length in terms of the number of bytes).
Get length of the strings as :u32 (as number of chars).
274 275 276 |
# File 'lib/polars/string_expr.rb', line 274 def n_chars Utils.wrap_expr(_rbexpr.str_len_chars) end |
#parse_int(radix = 2, strict: true) ⇒ Expr
Parse integers with base radix from strings.
By default base 2. ParseError/Overflows become Nulls.
1192 1193 1194 |
# File 'lib/polars/string_expr.rb', line 1192 def parse_int(radix = 2, strict: true) to_integer(base: 2, strict: strict).cast(Int32, strict: strict) end |
#replace(pattern, value, literal: false, n: 1) ⇒ Expr
Replace first matching regex/literal substring with a new string value.
1026 1027 1028 1029 1030 |
# File 'lib/polars/string_expr.rb', line 1026 def replace(pattern, value, literal: false, n: 1) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) value = Utils.expr_to_lit_or_expr(value, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n)) end |
#replace_all(pattern, value, literal: false) ⇒ Expr
Replace all matching regex/literal substrings with a new string value.
1056 1057 1058 1059 1060 |
# File 'lib/polars/string_expr.rb', line 1056 def replace_all(pattern, value, literal: false) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) value = Utils.expr_to_lit_or_expr(value, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal)) end |
#rjust(length, fillchar = " ") ⇒ Expr Also known as: pad_start
Return the string right justified in a string of length length.
Padding is done using the specified fillchar.
The original string is returned if length is less than or equal to
s.length.
542 543 544 |
# File 'lib/polars/string_expr.rb', line 542 def rjust(length, fillchar = " ") Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar)) end |
#slice(offset, length = nil) ⇒ Expr
Create subslices of the string values of a Utf8 Series.
1089 1090 1091 |
# File 'lib/polars/string_expr.rb', line 1089 def slice(offset, length = nil) Utils.wrap_expr(_rbexpr.str_slice(offset, length)) end |
#split(by, inclusive: false) ⇒ Expr
Split the string by a substring.
915 916 917 918 919 920 921 922 |
# File 'lib/polars/string_expr.rb', line 915 def split(by, inclusive: false) by = Utils.parse_as_expression(by, str_as_lit: true) if inclusive Utils.wrap_expr(_rbexpr.str_split_inclusive(by)) else Utils.wrap_expr(_rbexpr.str_split(by)) end end |
#split_exact(by, n, inclusive: false) ⇒ Expr
Split the string by a substring using n splits.
Results in a struct of n+1 fields.
If it cannot make n splits, the remaining field elements will be null.
958 959 960 961 962 963 964 965 |
# File 'lib/polars/string_expr.rb', line 958 def split_exact(by, n, inclusive: false) by = Utils.parse_as_expression(by, str_as_lit: true) if inclusive Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n)) else Utils.wrap_expr(_rbexpr.str_split_exact(by, n)) end end |
#splitn(by, n) ⇒ Expr
Split the string by a substring, restricted to returning at most n items.
If the number of possible splits is less than n-1, the remaining field
elements will be null. If the number of possible splits is n-1 or greater,
the last (nth) substring will contain the remainder of the string.
995 996 997 998 |
# File 'lib/polars/string_expr.rb', line 995 def splitn(by, n) by = Utils.parse_as_expression(by, str_as_lit: true) Utils.wrap_expr(_rbexpr.str_splitn(by, n)) end |
#starts_with(sub) ⇒ Expr
Check if string values start with a substring.
657 658 659 660 |
# File 'lib/polars/string_expr.rb', line 657 def starts_with(sub) sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_starts_with(sub)) end |
#strip_chars(characters = nil) ⇒ Expr Also known as: strip
Remove leading and trailing whitespace.
379 380 381 382 |
# File 'lib/polars/string_expr.rb', line 379 def strip_chars(characters = nil) characters = Utils.parse_as_expression(characters, str_as_lit: true) Utils.wrap_expr(_rbexpr.str_strip_chars(characters)) end |
#strip_chars_end(characters = nil) ⇒ Expr Also known as: rstrip
Remove trailing whitespace.
433 434 435 436 |
# File 'lib/polars/string_expr.rb', line 433 def strip_chars_end(characters = nil) characters = Utils.parse_as_expression(characters, str_as_lit: true) Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters)) end |
#strip_chars_start(characters = nil) ⇒ Expr Also known as: lstrip
Remove leading whitespace.
406 407 408 409 |
# File 'lib/polars/string_expr.rb', line 406 def strip_chars_start(characters = nil) characters = Utils.parse_as_expression(characters, str_as_lit: true) Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters)) end |
#strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) ⇒ Expr
When parsing a Datetime the column precision will be inferred from the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If no fractional second component is found then the default is "us".
Parse a Utf8 expression to a Date/Datetime/Time type.
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/polars/string_expr.rb', line 197 def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) _validate_format_argument(format) if dtype == Date to_date(format, strict: strict, exact: exact, cache: cache) elsif dtype == Datetime || dtype.is_a?(Datetime) dtype = Datetime.new if dtype == Datetime time_unit = dtype.time_unit time_zone = dtype.time_zone to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache) elsif dtype == Time to_time(format, strict: strict, cache: cache) else raise ArgumentError, "dtype should be of type {Date, Datetime, Time}" end end |
#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Date column.
40 41 42 43 |
# File 'lib/polars/string_expr.rb', line 40 def to_date(format = nil, strict: true, exact: true, cache: true) _validate_format_argument(format) Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache)) end |
#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true, use_earliest: nil, ambiguous: "raise") ⇒ Expr
Convert a Utf8 column into a Datetime column.
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/polars/string_expr.rb', line 79 def to_datetime( format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true, use_earliest: nil, ambiguous: "raise" ) _validate_format_argument(format) ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous) ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr) Utils.wrap_expr( self._rbexpr.str_to_datetime( format, time_unit, time_zone, strict, exact, cache, ambiguous._rbexpr ) ) end |
#to_integer(base: 10, strict: true) ⇒ Expr
Convert an Utf8 column into an Int64 column with base radix.
1160 1161 1162 |
# File 'lib/polars/string_expr.rb', line 1160 def to_integer(base: 10, strict: true) Utils.wrap_expr(_rbexpr.str_to_integer(base, strict)) end |
#to_lowercase ⇒ Expr
Transform to lowercase variant.
354 355 356 |
# File 'lib/polars/string_expr.rb', line 354 def to_lowercase Utils.wrap_expr(_rbexpr.str_to_lowercase) end |
#to_time(format = nil, strict: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Time column.
130 131 132 133 |
# File 'lib/polars/string_expr.rb', line 130 def to_time(format = nil, strict: true, cache: true) _validate_format_argument(format) Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache)) end |
#to_uppercase ⇒ Expr
Transform to uppercase variant.
333 334 335 |
# File 'lib/polars/string_expr.rb', line 333 def to_uppercase Utils.wrap_expr(_rbexpr.str_to_uppercase) end |
#zfill(alignment) ⇒ Expr
Fills the string with zeroes.
Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
A leading sign prefix ('+'/'-') is handled by inserting the padding after the
sign character rather than before. The original string is returned if width is
less than or equal to s.length.
477 478 479 |
# File 'lib/polars/string_expr.rb', line 477 def zfill(alignment) Utils.wrap_expr(_rbexpr.str_zfill(alignment)) end |