Class: Polars::StringExpr
- Inherits:
-
Object
- Object
- Polars::StringExpr
- Defined in:
- lib/polars/string_expr.rb
Overview
Namespace for string related expressions.
Instance Method Summary collapse
-
#concat(delimiter = "-") ⇒ Expr
Vertically concat the values in the Series to a single string value.
-
#contains(pattern, literal: false, strict: true) ⇒ Expr
Check if string contains a substring that matches a regex.
-
#count_match(pattern) ⇒ Expr
Count all successive non-overlapping regex matches.
-
#decode(encoding, strict: true) ⇒ Expr
Decode a value using the provided encoding.
-
#encode(encoding) ⇒ Expr
Encode a value using the provided encoding.
-
#ends_with(sub) ⇒ Expr
Check if string values end with a substring.
-
#explode ⇒ Expr
Returns a column with a separate row for every string character.
-
#extract(pattern, group_index: 1) ⇒ Expr
Extract the target capture group from provided patterns.
-
#extract_all(pattern) ⇒ Expr
Extracts all matches for the given regex pattern.
-
#json_extract(dtype = nil, infer_schema_length: 100) ⇒ Expr
Parse string values as JSON.
-
#json_path_match(json_path) ⇒ Expr
Extract the first match of json string with provided JSONPath expression.
-
#lengths ⇒ Expr
Get length of the strings as
:u32(as number of bytes). -
#ljust(width, fillchar = " ") ⇒ Expr
Return the string left justified in a string of length
width. -
#lstrip(matches = nil) ⇒ Expr
Remove leading whitespace.
-
#n_chars ⇒ Expr
Get length of the strings as
:u32(as number of chars). -
#parse_int(radix = 2, strict: true) ⇒ Expr
Parse integers with base radix from strings.
-
#replace(pattern, value, literal: false, n: 1) ⇒ Expr
Replace first matching regex/literal substring with a new string value.
-
#replace_all(pattern, value, literal: false) ⇒ Expr
Replace all matching regex/literal substrings with a new string value.
-
#rjust(width, fillchar = " ") ⇒ Expr
Return the string right justified in a string of length
width. -
#rstrip(matches = nil) ⇒ Expr
Remove trailing whitespace.
-
#slice(offset, length = nil) ⇒ Expr
Create subslices of the string values of a Utf8 Series.
-
#split(by, inclusive: false) ⇒ Expr
Split the string by a substring.
-
#split_exact(by, n, inclusive: false) ⇒ Expr
Split the string by a substring using
nsplits. -
#splitn(by, n) ⇒ Expr
Split the string by a substring, restricted to returning at most
nitems. -
#starts_with(sub) ⇒ Expr
Check if string values start with a substring.
-
#strip(matches = nil) ⇒ Expr
Remove leading and trailing whitespace.
-
#strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) ⇒ Expr
Parse a Utf8 expression to a Date/Datetime/Time type.
-
#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Date column.
-
#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Datetime column.
-
#to_lowercase ⇒ Expr
Transform to lowercase variant.
-
#to_time(format = nil, strict: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Time column.
-
#to_uppercase ⇒ Expr
Transform to uppercase variant.
-
#zfill(alignment) ⇒ Expr
Fills the string with zeroes.
Instance Method Details
#concat(delimiter = "-") ⇒ Expr
Vertically concat the values in the Series to a single string value.
292 293 294 |
# File 'lib/polars/string_expr.rb', line 292 def concat(delimiter = "-") Utils.wrap_expr(_rbexpr.str_concat(delimiter)) end |
#contains(pattern, literal: false, strict: true) ⇒ Expr
Check if string contains a substring that matches a regex.
558 559 560 561 |
# File 'lib/polars/string_expr.rb', line 558 def contains(pattern, literal: false, strict: true) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict)) end |
#count_match(pattern) ⇒ Expr
Count all successive non-overlapping regex matches.
867 868 869 |
# File 'lib/polars/string_expr.rb', line 867 def count_match(pattern) Utils.wrap_expr(_rbexpr.count_match(pattern)) end |
#decode(encoding, strict: true) ⇒ Expr
Decode a value using the provided encoding.
738 739 740 741 742 743 744 745 746 |
# File 'lib/polars/string_expr.rb', line 738 def decode(encoding, strict: true) if encoding == "hex" Utils.wrap_expr(_rbexpr.str_hex_decode(strict)) elsif encoding == "base64" Utils.wrap_expr(_rbexpr.str_base64_decode(strict)) else raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}" end end |
#encode(encoding) ⇒ Expr
Encode a value using the provided encoding.
769 770 771 772 773 774 775 776 777 |
# File 'lib/polars/string_expr.rb', line 769 def encode(encoding) if encoding == "hex" Utils.wrap_expr(_rbexpr.str_hex_encode) elsif encoding == "base64" Utils.wrap_expr(_rbexpr.str_base64_encode) else raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}" end end |
#ends_with(sub) ⇒ Expr
Check if string values end with a substring.
598 599 600 601 |
# File 'lib/polars/string_expr.rb', line 598 def ends_with(sub) sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_ends_with(sub)) end |
#explode ⇒ Expr
Returns a column with a separate row for every string character.
1090 1091 1092 |
# File 'lib/polars/string_expr.rb', line 1090 def explode Utils.wrap_expr(_rbexpr.str_explode) end |
#extract(pattern, group_index: 1) ⇒ Expr
Extract the target capture group from provided patterns.
807 808 809 |
# File 'lib/polars/string_expr.rb', line 807 def extract(pattern, group_index: 1) Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index)) end |
#extract_all(pattern) ⇒ Expr
Extracts all matches for the given regex pattern.
Extracts each successive non-overlapping regex match in an individual string as an array.
838 839 840 841 |
# File 'lib/polars/string_expr.rb', line 838 def extract_all(pattern) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr)) end |
#json_extract(dtype = nil, infer_schema_length: 100) ⇒ Expr
Parse string values as JSON.
Throw errors if encounter invalid JSON strings.
670 671 672 673 674 675 |
# File 'lib/polars/string_expr.rb', line 670 def json_extract(dtype = nil, infer_schema_length: 100) if !dtype.nil? dtype = Utils.rb_type_to_dtype(dtype) end Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length)) end |
#json_path_match(json_path) ⇒ Expr
Extract the first match of json string with provided JSONPath expression.
Throw errors if encounter invalid json strings. All return value will be casted to Utf8 regardless of the original value.
Documentation on JSONPath standard can be found here.
708 709 710 |
# File 'lib/polars/string_expr.rb', line 708 def json_path_match(json_path) Utils.wrap_expr(_rbexpr.str_json_path_match(json_path)) end |
#lengths ⇒ Expr
The returned lengths are equal to the number of bytes in the UTF8 string. If you
need the length in terms of the number of characters, use n_chars instead.
Get length of the strings as :u32 (as number of bytes).
237 238 239 |
# File 'lib/polars/string_expr.rb', line 237 def lengths Utils.wrap_expr(_rbexpr.str_lengths) end |
#ljust(width, fillchar = " ") ⇒ Expr
Return the string left justified in a string of length width.
Padding is done using the specified fillchar.
The original string is returned if width is less than or equal to
s.length.
492 493 494 |
# File 'lib/polars/string_expr.rb', line 492 def ljust(width, fillchar = " ") Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar)) end |
#lstrip(matches = nil) ⇒ Expr
Remove leading whitespace.
387 388 389 390 391 392 |
# File 'lib/polars/string_expr.rb', line 387 def lstrip(matches = nil) if !matches.nil? && matches.length > 1 raise ArgumentError, "matches should contain a single character" end Utils.wrap_expr(_rbexpr.str_lstrip(matches)) end |
#n_chars ⇒ Expr
If you know that you are working with ASCII text, lengths will be
equivalent, and faster (returns length in terms of the number of bytes).
Get length of the strings as :u32 (as number of chars).
269 270 271 |
# File 'lib/polars/string_expr.rb', line 269 def n_chars Utils.wrap_expr(_rbexpr.str_n_chars) end |
#parse_int(radix = 2, strict: true) ⇒ Expr
Parse integers with base radix from strings.
By default base 2. ParseError/Overflows become Nulls.
1138 1139 1140 |
# File 'lib/polars/string_expr.rb', line 1138 def parse_int(radix = 2, strict: true) Utils.wrap_expr(_rbexpr.str_parse_int(radix, strict)) end |
#replace(pattern, value, literal: false, n: 1) ⇒ Expr
Replace first matching regex/literal substring with a new string value.
1002 1003 1004 1005 1006 |
# File 'lib/polars/string_expr.rb', line 1002 def replace(pattern, value, literal: false, n: 1) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) value = Utils.expr_to_lit_or_expr(value, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n)) end |
#replace_all(pattern, value, literal: false) ⇒ Expr
Replace all matching regex/literal substrings with a new string value.
1032 1033 1034 1035 1036 |
# File 'lib/polars/string_expr.rb', line 1032 def replace_all(pattern, value, literal: false) pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true) value = Utils.expr_to_lit_or_expr(value, str_to_lit: true) Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal)) end |
#rjust(width, fillchar = " ") ⇒ Expr
Return the string right justified in a string of length width.
Padding is done using the specified fillchar.
The original string is returned if width is less than or equal to
s.length.
524 525 526 |
# File 'lib/polars/string_expr.rb', line 524 def rjust(width, fillchar = " ") Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar)) end |
#rstrip(matches = nil) ⇒ Expr
Remove trailing whitespace.
415 416 417 418 419 420 |
# File 'lib/polars/string_expr.rb', line 415 def rstrip(matches = nil) if !matches.nil? && matches.length > 1 raise ArgumentError, "matches should contain a single character" end Utils.wrap_expr(_rbexpr.str_rstrip(matches)) end |
#slice(offset, length = nil) ⇒ Expr
Create subslices of the string values of a Utf8 Series.
1065 1066 1067 |
# File 'lib/polars/string_expr.rb', line 1065 def slice(offset, length = nil) Utils.wrap_expr(_rbexpr.str_slice(offset, length)) end |
#split(by, inclusive: false) ⇒ Expr
Split the string by a substring.
894 895 896 897 898 899 900 |
# File 'lib/polars/string_expr.rb', line 894 def split(by, inclusive: false) if inclusive Utils.wrap_expr(_rbexpr.str_split_inclusive(by)) else Utils.wrap_expr(_rbexpr.str_split(by)) end end |
#split_exact(by, n, inclusive: false) ⇒ Expr
Split the string by a substring using n splits.
Results in a struct of n+1 fields.
If it cannot make n splits, the remaining field elements will be null.
936 937 938 939 940 941 942 |
# File 'lib/polars/string_expr.rb', line 936 def split_exact(by, n, inclusive: false) if inclusive Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n)) else Utils.wrap_expr(_rbexpr.str_split_exact(by, n)) end end |
#splitn(by, n) ⇒ Expr
Split the string by a substring, restricted to returning at most n items.
If the number of possible splits is less than n-1, the remaining field
elements will be null. If the number of possible splits is n-1 or greater,
the last (nth) substring will contain the remainder of the string.
972 973 974 |
# File 'lib/polars/string_expr.rb', line 972 def splitn(by, n) Utils.wrap_expr(_rbexpr.str_splitn(by, n)) end |
#starts_with(sub) ⇒ Expr
Check if string values start with a substring.
638 639 640 641 |
# File 'lib/polars/string_expr.rb', line 638 def starts_with(sub) sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr Utils.wrap_expr(_rbexpr.str_starts_with(sub)) end |
#strip(matches = nil) ⇒ Expr
Remove leading and trailing whitespace.
359 360 361 362 363 364 |
# File 'lib/polars/string_expr.rb', line 359 def strip(matches = nil) if !matches.nil? && matches.length > 1 raise ArgumentError, "matches should contain a single character" end Utils.wrap_expr(_rbexpr.str_strip(matches)) end |
#strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) ⇒ Expr
When parsing a Datetime the column precision will be inferred from the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If no fractional second component is found then the default is "us".
Parse a Utf8 expression to a Date/Datetime/Time type.
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
# File 'lib/polars/string_expr.rb', line 192 def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false) _validate_format_argument(format) if dtype == Date to_date(format, strict: strict, exact: exact, cache: cache) elsif dtype == Datetime || dtype.is_a?(Datetime) dtype = Datetime.new if dtype == Datetime time_unit = dtype.time_unit time_zone = dtype.time_zone to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache) elsif dtype == Time to_time(format, strict: strict, cache: cache) else raise ArgumentError, "dtype should be of type {Date, Datetime, Time}" end end |
#to_date(format = nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Date column.
40 41 42 43 |
# File 'lib/polars/string_expr.rb', line 40 def to_date(format = nil, strict: true, exact: true, cache: true) _validate_format_argument(format) Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache)) end |
#to_datetime(format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Datetime column.
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/polars/string_expr.rb', line 79 def to_datetime( format = nil, time_unit: nil, time_zone: nil, strict: true, exact: true, cache: true ) _validate_format_argument(format) Utils.wrap_expr( self._rbexpr.str_to_datetime( format, time_unit, time_zone, strict, exact, cache ) ) end |
#to_lowercase ⇒ Expr
Transform to lowercase variant.
334 335 336 |
# File 'lib/polars/string_expr.rb', line 334 def to_lowercase Utils.wrap_expr(_rbexpr.str_to_lowercase) end |
#to_time(format = nil, strict: true, cache: true) ⇒ Expr
Convert a Utf8 column into a Time column.
125 126 127 128 |
# File 'lib/polars/string_expr.rb', line 125 def to_time(format = nil, strict: true, cache: true) _validate_format_argument(format) Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache)) end |
#to_uppercase ⇒ Expr
Transform to uppercase variant.
313 314 315 |
# File 'lib/polars/string_expr.rb', line 313 def to_uppercase Utils.wrap_expr(_rbexpr.str_to_uppercase) end |
#zfill(alignment) ⇒ Expr
Fills the string with zeroes.
Return a copy of the string left filled with ASCII '0' digits to make a string of length width.
A leading sign prefix ('+'/'-') is handled by inserting the padding after the
sign character rather than before. The original string is returned if width is
less than or equal to s.length.
460 461 462 |
# File 'lib/polars/string_expr.rb', line 460 def zfill(alignment) Utils.wrap_expr(_rbexpr.str_zfill(alignment)) end |