String Functions¶

Functions for casting, searching, transforming, and formatting string values.

Functions marked vectorized apply to both single values and arrays — pass a single string or an array and the behavior is the same.

from everysk.sdk.engines import Expression
expression_engine = Expression()

Casting & Cleaning¶

cast_string¶

cast_string(value, default=None)

Casts value to a string. Returns default if the result would be None.

expression_engine.solve('cast_string(v)', {'v': 123})
# '123'

expression_engine.solve('cast_string(v)', {'v': 123.45})
# '123.45'

expression_engine.solve('cast_string(v)', {'v': True})
# 'True'

expression_engine.solve('cast_string(v)', {'v': None})
# None

expression_engine.solve('cast_string(v, d)', {'v': None, 'd': 'N/A'})
# 'N/A'

drop_nas¶

drop_nas(arr)

Returns a new array containing only the elements of arr that are already string objects. All non-string values (numbers, None, booleans, dates) are discarded.

import numpy as np

expression_engine.solve('drop_nas(arr)', {'arr': np.array([1, 'AAPL', None, 'MSFT', False])})
# ['AAPL', 'MSFT']

expression_engine.solve('drop_nas(arr)', {'arr': np.array(['abc', '123'])})
# ['abc', '123']

Splitting & Joining¶

split¶

split(source_string, separator=None, max_split=-1)

Splits source_string on separator and returns an array of substrings. When separator is None, splits on any whitespace and discards empty strings. max_split limits the number of splits performed.

expression_engine.solve('split(text)', {'text': 'hello world'})
# ['hello', 'world']

expression_engine.solve('split(text, sep)', {'text': 'hello world', 'sep': ' '})
# ['hello', 'world']

expression_engine.solve('split(text, sep)', {'text': 'hello-world', 'sep': '-'})
# ['hello', 'world']

expression_engine.solve('split(text, sep, n)', {'text': '1 2 3', 'sep': ' ', 'n': 1})
# ['1', '2 3']

join¶

join(arr, delimiter=' ')

Joins the elements of arr into a single string, inserting delimiter between each element.

expression_engine.solve('join(words)', {'words': np.array(['hello', 'world'])})
# 'hello world'

expression_engine.solve('join(words, sep)', {'words': np.array(['a', 'b', 'c']), 'sep': ','})
# 'a,b,c'

expression_engine.solve('join(words, sep)', {'words': np.array(['hello', 'world']), 'sep': ''})
# 'helloworld'

str_concat¶

str_concat(*args, delimiter='', sanitize_whitespace=False)

Concatenates multiple values or arrays into a single string (or array of strings). When any argument is an array, concatenation is performed element-wise across all array arguments (all must have the same length). None values are treated as empty strings. Consecutive delimiter occurrences caused by None values are collapsed into one.

expression_engine.solve('str_concat(a, b)', {'a': 'hello', 'b': 'world'})
# 'helloworld'

expression_engine.solve('str_concat(a, b, d)', {'a': 'hello', 'b': 'world', 'd': '-'})
# 'hello-world'

expression_engine.solve('str_concat(a, b, d)', {'a': 'hello', 'b': None, 'd': '-'})
# 'hello'

# Element-wise on arrays
expression_engine.solve(
    'str_concat(first, last, d)',
    {'first': np.array(['Mary', 'John']), 'last': np.array(['Smith', 'Doe']), 'd': ' '}
)
# ['Mary Smith', 'John Doe']

Search & Indexing¶

find¶

find(source_string, target_string, start=0, end=None)

Vectorized. Returns the index of the first occurrence of target_string in source_string. Returns -1 if not found. Returns None if source_string is None.

expression_engine.solve('find(text, sub)', {'text': 'hello world', 'sub': 'world'})
# 6

expression_engine.solve('find(text, sub)', {'text': 'hello world', 'sub': 'hello'})
# 0

expression_engine.solve('find(text, sub)', {'text': 'hello world', 'sub': 'earth'})
# -1

expression_engine.solve('find(text, sub)', {'text': None, 'sub': 'world'})
# None

rfind¶

rfind(source_string, target_string, start=0, end=None)

Vectorized. Returns the index of the last occurrence of target_string. Returns -1 if not found. Returns None if source_string is None.

expression_engine.solve('rfind(text, sub)', {'text': 'hello world world', 'sub': 'world'})
# 12

expression_engine.solve('rfind(text, sub)', {'text': 'hello world', 'sub': 'earth'})
# -1

index¶

index(source_string, target_string, start=0, end=None)

Vectorized. Like find, but returns None when source_string is None instead of -1. Raises ValueError if target_string is not found.

expression_engine.solve('index(text, sub)', {'text': 'hello world', 'sub': 'world'})
# 6

expression_engine.solve('index(text, sub)', {'text': None, 'sub': 'world'})
# None

rindex¶

rindex(source_string, target_string, start=0, end=None)

Vectorized. Like rfind, but returns None when source_string is None. Raises ValueError if target_string is not found.

expression_engine.solve('rindex(text, sub)', {'text': 'Hello World', 'sub': 'o'})
# 7

str_slice¶

str_slice(source_string, start_index=None, end_index=None, step=None)

Vectorized. Returns the slice source_string[start_index:end_index:step]. Returns None if source_string is None.

expression_engine.solve('str_slice(text, s, e)', {'text': 'hello world', 's': 0, 'e': 5})
# 'hello'

expression_engine.solve('str_slice(text, s, e)', {'text': 'hello world', 's': 6, 'e': 11})
# 'world'

# Every other character
expression_engine.solve('str_slice(text, s, e, step)', {'text': 'abcdef', 's': 0, 'e': 6, 'step': 2})
# 'ace'

# Reverse a string
expression_engine.solve('str_slice(text, s, e, step)', {'text': 'abcdef', 's': None, 'e': None, 'step': -1})
# 'fedcba'

str_len¶

str_len(source_string)

Vectorized. Returns the character length of source_string. Returns None if source_string is None.

expression_engine.solve('str_len(text)', {'text': 'hello'})
# 5

expression_engine.solve('str_len(text)', {'text': ' hello '})
# 7

expression_engine.solve('str_len(names)', {'names': np.array(['Mary Smith', 'John Doe'])})
# [10, 8]

match¶

match(source_string, regex_pattern)

Vectorized. Returns True if source_string matches the regular expression regex_pattern from the start of the string. Returns False if source_string is None.

expression_engine.solve('match(text, pat)', {'text': 'hello', 'pat': r'h'})
# True

expression_engine.solve('match(text, pat)', {'text': 'hello', 'pat': r'H'})
# False

expression_engine.solve('match(text, pat)', {'text': '123hello', 'pat': r'\d'})
# True

expression_engine.solve('match(texts, pat)', {'texts': np.array(['hello', 'HELLO', ' hello']), 'pat': r'h'})
# [True, False, False]

is_substring¶

is_substring(source_string, target_string)

Returns True if source_string is contained in target_string (i.e. source_string in target_string). Returns False if either argument is None. Raises TypeError if both arguments are arrays.

expression_engine.solve('is_substring(sub, text)', {'sub': 'hello', 'text': 'hello world'})
# True

expression_engine.solve('is_substring(sub, text)', {'sub': 'earth', 'text': 'hello world'})
# False

expression_engine.solve('is_substring(sub, text)', {'sub': None, 'text': 'hello'})
# False

has_substring¶

has_substring(source_string, target_string)

Returns True if target_string is contained in source_string (i.e. target_string in source_string). This is the inverse argument order of is_substring.

expression_engine.solve('has_substring(text, sub)', {'text': 'hello world', 'sub': 'hello'})
# True

expression_engine.solve('has_substring(text, sub)', {'text': 'hello world', 'sub': 'earth'})
# False

# Applied to an array of securities
expression_engine.solve('has_substring(tickers, prefix)', {'tickers': np.array(['AAPL', 'MSFT', 'AMZN']), 'prefix': 'AA'})
# [True, False, False]

Manipulation¶

replace¶

replace(source_string, old_value, new_value)

Vectorized. Replaces all occurrences of old_value with new_value. Returns None if source_string is None.

expression_engine.solve('replace(text, old, new)', {'text': 'Hello, World!', 'old': 'World', 'new': 'Universe'})
# 'Hello, Universe!'

expression_engine.solve(
    'replace(texts, old, new)',
    {'texts': np.array(['Hello, World!', 'World', 'World, Hello']), 'old': 'World', 'new': 'Everysk'}
)
# ['Hello, Everysk!', 'Everysk', 'Everysk, Hello']

strip¶

strip(source_string)

Vectorized. Removes leading and trailing whitespace. Returns None if source_string is None.

expression_engine.solve('strip(text)', {'text': '  hello  '})
# 'hello'

expression_engine.solve('strip(text)', {'text': '\nhello\n'})
# 'hello'

rstrip¶

rstrip(source_string)

Vectorized. Removes trailing whitespace only. Returns None if source_string is None.

expression_engine.solve('rstrip(text)', {'text': '  hello  '})
# '  hello'

lstrip¶

lstrip(source_string)

Vectorized. Removes leading whitespace only. Returns None if source_string is None.

expression_engine.solve('lstrip(text)', {'text': '  hello  '})
# 'hello  '

expand_tabs¶

expand_tabs(source_string, tab_size=4)

Vectorized. Replaces tab characters (\t) with spaces according to tab_size. Returns None if source_string is None.

expression_engine.solve('expand_tabs(text)', {'text': 'hello\tworld'})
# 'hello   world'

expression_engine.solve('expand_tabs(text, size)', {'text': 'hello\tworld', 'size': 8})
# 'hello   world'

expression_engine.solve('expand_tabs(text, size)', {'text': '01\t012\t0123', 'size': 4})
# '01  012 0123'

str_normalize¶

str_normalize(source_string, form='NFKD')

Vectorized. Normalizes a Unicode string to the specified form and removes all diacritical marks (accents). The Angstrom sign (Å) is normalized to 'A'. Returns None if source_string is None or the normalization form is invalid.

expression_engine.solve('str_normalize(text)', {'text': 'café'})
# 'cafe'

expression_engine.solve('str_normalize(text)', {'text': 'mañana'})
# 'manana'

expression_engine.solve('str_normalize(text)', {'text': 'Ångström'})
# 'Angstrom'

expression_engine.solve('str_normalize(text)', {'text': 'Crème Brûlée'})
# 'Creme Brulee'

expression_engine.solve(
    'str_normalize(names)',
    {'names': np.array(['café', 'mañana', 'Água'])}
)
# ['cafe', 'manana', 'Agua']

str_random¶

str_random(length=<default>, characters=<alphanumeric>)

Generates a random alphanumeric string of the given length. A custom characters string can be provided to restrict the character set.

expression_engine.solve('str_random()')
# 'aK3mPqL9'  (random, length set by system default)

expression_engine.solve('str_random(n)', {'n': 16})
# 'xR7tYm2nQw4vZp8k'  (random, 16 characters)

expression_engine.solve('str_random(n, chars)', {'n': 8, 'chars': '0123456789'})
# '04829371'  (random digits only)

Alignment & Padding¶

center¶

center(source_string, width, fillchar=' ')

Vectorized. Centers source_string within a field of width characters, padding both sides with fillchar. Returns None if source_string is None.

expression_engine.solve('center(text, w, c)', {'text': 'hello', 'w': 11, 'c': '-'})
# '---hello---'

expression_engine.solve('center(text, w)', {'text': 'hello', 'w': 10})
# '  hello   '

rjust¶

rjust(source_string, width, fillchar=' ')

Vectorized. Right-justifies source_string by padding on the left with fillchar to reach width characters. Returns None if source_string is None.

expression_engine.solve('rjust(text, w, c)', {'text': 'hello', 'w': 10, 'c': '*'})
# '*****hello'

expression_engine.solve('rjust(text, w, c)', {'text': 'coding', 'w': 10, 'c': '-'})
# '----coding'

expression_engine.solve(
    'rjust(texts, w, c)',
    {'texts': np.array(['hello', 'world']), 'w': 10, 'c': '-'}
)
# ['-----hello', '-----world']

ljust¶

ljust(source_string, width, fillchar=' ')

Vectorized. Left-justifies source_string by padding on the right with fillchar to reach width characters. Returns None if source_string is None.

expression_engine.solve('ljust(text, w, c)', {'text': 'hello', 'w': 10, 'c': '-'})
# 'hello-----'

zfill¶

zfill(source_string, width)

Vectorized. Pads source_string on the left with '0' characters until it reaches width characters.

expression_engine.solve('zfill(text, w)', {'text': '42', 'w': 5})
# '00042'

expression_engine.solve('zfill(text, w)', {'text': '12345', 'w': 5})
# '12345'

expression_engine.solve('zfill(text, w)', {'text': None, 'w': 7})
# None

Case¶

lower¶

lower(source_string)

Vectorized. Converts all characters to lowercase. Returns None if source_string is None.

expression_engine.solve('lower(text)', {'text': 'HELLO'})
# 'hello'

expression_engine.solve('lower(texts)', {'texts': np.array(['ABC', 'DEF', 'GHI'])})
# ['abc', 'def', 'ghi']

upper¶

upper(source_string)

Vectorized. Converts all characters to uppercase. Returns None if source_string is None.

expression_engine.solve('upper(text)', {'text': 'hello'})
# 'HELLO'

expression_engine.solve('upper(texts)', {'texts': np.array(['hello', 'world'])})
# ['HELLO', 'WORLD']

capitalize¶

capitalize(source_string)

Vectorized. Uppercases the first character and lowercases all others. Returns None if source_string is None.

expression_engine.solve('capitalize(text)', {'text': 'hello'})
# 'Hello'

expression_engine.solve('capitalize(text)', {'text': 'HELLO'})
# 'Hello'

expression_engine.solve('capitalize(text)', {'text': '123hello'})
# '123hello'

title¶

title(source_string)

Vectorized. Uppercases the first character of each word and lowercases the rest. Returns None if source_string is None.

expression_engine.solve('title(text)', {'text': 'hello world'})
# 'Hello World'

expression_engine.solve('title(text)', {'text': 'HELLO WORLD'})
# 'Hello World'

expression_engine.solve('title(text)', {'text': '123hello 123world'})
# '123Hello 123World'

swapcase¶

swapcase(source_string)

Vectorized. Inverts the case of every character — uppercase becomes lowercase and vice versa. Returns None if source_string is None.

expression_engine.solve('swapcase(text)', {'text': 'Hello World'})
# 'hELLO wORLD'

expression_engine.solve('swapcase(text)', {'text': 'TeStE'})
# 'tEsTe'

Prefix & Suffix Checks¶

startswith¶

startswith(source_string, search_string)

Vectorized. Returns True if source_string starts with search_string. Returns None if source_string is None.

expression_engine.solve('startswith(text, prefix)', {'text': 'Hello World', 'prefix': 'H'})
# True

expression_engine.solve('startswith(text, prefix)', {'text': 'Hello World', 'prefix': 'W'})
# False

expression_engine.solve('startswith(texts, prefix)', {'texts': np.array(['AAPL', 'MSFT', 'AMZN']), 'prefix': 'A'})
# [True, False, True]

endswith¶

endswith(source_string, search_string)

Vectorized. Returns True if source_string ends with search_string. Returns None if source_string is None.

expression_engine.solve('endswith(text, suffix)', {'text': 'Hello World', 'suffix': 'd'})
# True

expression_engine.solve('endswith(text, suffix)', {'text': 'Hello World', 'suffix': 'H'})
# False

String Inspection¶

is_alpha¶

is_alpha(source_string)

Vectorized. Returns True if all characters in source_string are alphabetic. Returns None if source_string is None.

expression_engine.solve('is_alpha(text)', {'text': 'hello'})
# True

expression_engine.solve('is_alpha(text)', {'text': 'hello123'})
# False

expression_engine.solve('is_alpha(text)', {'text': ' hello '})
# False

is_alnum¶

is_alnum(source_string)

Vectorized. Returns True if all characters are alphanumeric (letters or digits). Returns None if source_string is None.

expression_engine.solve('is_alnum(text)', {'text': 'abc123'})
# True

expression_engine.solve('is_alnum(text)', {'text': 'abc 123'})
# False

is_decimal¶

is_decimal(source_string)

Vectorized. Returns True if all characters are decimal digits (0–9). Returns None if source_string is None.

expression_engine.solve('is_decimal(text)', {'text': '12345'})
# True

expression_engine.solve('is_decimal(text)', {'text': '123.45'})
# False

is_digit¶

is_digit(source_string)

Vectorized. Returns True if all characters are digit characters (includes some Unicode superscript digits). Returns None if source_string is None.

expression_engine.solve('is_digit(text)', {'text': '12345'})
# True

expression_engine.solve('is_digit(text)', {'text': '123abc'})
# False

is_numeric¶

is_numeric(source_string)

Vectorized. Returns True if all characters are numeric (broader than is_digit — includes fractions and numeric Unicode). Returns None if source_string is None.

expression_engine.solve('is_numeric(text)', {'text': '12345'})
# True

expression_engine.solve('is_numeric(text)', {'text': '123.45'})
# False

is_lower¶

is_lower(source_string)

Vectorized. Returns True if all cased characters are lowercase (digits and spaces are ignored). Returns None if source_string is None.

expression_engine.solve('is_lower(text)', {'text': 'hello'})
# True

expression_engine.solve('is_lower(text)', {'text': '123hello'})
# True

expression_engine.solve('is_lower(text)', {'text': 'Hello'})
# False

is_upper¶

is_upper(source_string)

Vectorized. Returns True if all cased characters are uppercase. Returns None if source_string is None.

expression_engine.solve('is_upper(text)', {'text': 'HELLO'})
# True

expression_engine.solve('is_upper(text)', {'text': 'HELLO WORLD!'})
# True

expression_engine.solve('is_upper(text)', {'text': 'Hello'})
# False

is_title¶

is_title(source_string)

Vectorized. Returns True if the string is title-cased (first letter of each word is uppercase, the rest lowercase). Returns None if source_string is None.

expression_engine.solve('is_title(text)', {'text': 'Hello World'})
# True

expression_engine.solve('is_title(text)', {'text': 'hello world'})
# False

expression_engine.solve('is_title(text)', {'text': 'Hello world'})
# False

is_space¶

is_space(source_string)

Vectorized. Returns True if the string contains only whitespace characters (spaces, tabs, newlines). Returns None if source_string is None.

expression_engine.solve('is_space(text)', {'text': ' '})
# True

expression_engine.solve('is_space(text)', {'text': '\t\n'})
# True

expression_engine.solve('is_space(text)', {'text': ' hello '})
# False