The C standard library has some notable gaps in its string functions. These functions assume 7-bit ASCII where it matters; UTF-8 versions can be
found at
here.
Coverage is sparse at the moment, like the documentation.
/*
Naming Convention
Mostly derived from string.h
str n? r? case? {operation} _inplace?
str: consistent prefix
n: limited by bytes
r: reverse; starts at the end of the string
case: case-insensitive
operation: see below
_inplace: modifies the source buffer
Operations:
cat: append onto existing string; strcat
chr: search for a character; strchr
cmp: compare; strcmp
colwsp: collapse whitespace. All sequences of whitespace are converted into a single copy of the provided character
cpy: copy; strcpy
cspn: return length of inverse prefix substring (postfix for 'r' version)
dup: duplicate into newly allocated memory; strdup
dupa: duplicate onto the stack using alloca
len: calculate length; strlen
pbrk: search for the first of any of a set of characters
rev: reverse the string
spn: return length of prefix substring (postfix for 'r' version)
skip: search for the first character not in a set of characters (strspn, but returns a pointer)
str: search for a substring; strstr
tolower: convert the string to lowercase
toupper: convert the string to uppercase
ltrim: remove a prefix of characters in a set from the beginning of the string
rtrim: remove a suffix of characters in a set from the end of the string
trim: remove a sequence characters in a set from the beginning and end of the string
Arguments:
int c; a single ASCII character
size_t len; operation limit, in bytes
*/
// regular ascii functions:
// spn functions return the number of characters spanned
// returns a pointer to the found char, or NULL
char* strnchr(const char* s, int c, size_t n);
// returns s
char* strrev(char* s);
char* strnrev(const char* s, size_t n);
// returns a pointer to the first match character
char* strnpbrk(const char* s, const char* accept, size_t n);
char* strrpbrk(const char* s, const char* accept);
char* strnrpbrk(const char* s, const char* accept, size_t n);
// The length of the initial part of "s" not containing any of the characters that are part of "reject".
size_t strncspn(const char* s, const char* reject, size_t n);
size_t strrcspn(const char* s, const char* reject);
size_t strnrcspn(const char* s, const char* reject, size_t n);
//return the number of characters spanned
size_t strnrspn(const char* s, const char* accept, size_t n);
// moves chars to left, returns s
char* strnltrim(char* s, const char* charset, size_t n);
// does not trim, returns s
char* strcolwsp(char* s, int c);
char* strncolwsp(char* s, int c, size_t n);
// also trims, returns s
char* strcolwsptrim(char* s, int c);
// capitalize the first letter following whitespace, and the beginning of the string, returns s
char* strcapwords(char* s);
char* strncapwords(char* s, size_t n);
// capitalize the first letter following terminal punctuation, and the beginning of the string, returns s
char* strcapsentences(char* s);
char* strncapsentences(char* s, size_t n);
// limited strspn
size_t strnspn(const char* s, size_t count, const char* accept);
// reverse strspn
size_t strrspn(const char* s, const char* accept);
// reverse strstr
const char* strrstr(const char* haystack, const char* needle);
// length of the line, or length of the string if no \n found
size_t strlnlen(const char* s);
// strdup a line
char* strlndup(const char* s);
// use alloca
//char* strdupa(const char* s);
// TODO: string-reverse
// line count;
size_t strlinecnt(const char* s);
// allocates a new buffer and calls sprintf with it
char* sprintfdup(char* fmt, ...);
// concatenate all argument strings together in a new buffer
#define strcatdup(...) strcatdup_(PP_NARG(__VA_ARGS__), __VA_ARGS__)
char* strcatdup_(size_t nargs, ...);
// concatenate all argument strings together in a new buffer,
// with the given joining string between them
#define strjoin(j, ...) strjoin_(j, PP_NARG(__VA_ARGS__), __VA_ARGS__)
char* strjoin_(char* joiner, size_t nargs, ...);
// returns a null-terminated list of pointers to each line.
// mutates the source (replaces newlines with nulls)
char** strsplit_inplace(char* src, char delim, size_t* outLen);
// allocates individual memory chunks for each split pointer
char** strsplit(char* src, char delim, size_t* outLen);
// trim left
size_t strtriml(char* s, const char* trim);
// trim right
size_t strtrimr(char* s, const char* trim);
// both left and right
size_t strtrim(char* s, const char* trim);
// handy shortcut
static inline char* strskip(char* s, char* skip) {
return s + strspn(s, skip);
}
// decodes strings according to the string literal rules in C
// *s is advanced to the next char
// gleefully advances the pointer through nulls like any other character
// returns 1 if the character was escaped
// returns an error code on invalid escape sequences
int decode_c_string_char(char** s, int* c_out);
typedef struct number_parse_info {
union {
long double f;
unsigned long long int n;
};
char type; // 'f', 'i'
char base;
// suffixes
char longs; // 0 for unspecified
char not_signed; // 0 for unspecified
} number_parse_info;
int read_c_number(char** s, number_parse_info* info);
// format in arbitrary base/charset
int sprintlongb(char* buf, int base, int64_t n, char* charset);
// returns the numerical value of a single hex digit
unsigned int decodeHexDigit(char c);
// returns rgba, with r in most significant bits and a in the least
uint32_t decodeHexColor(char* s);
// returns rgba, with r in out[0] and a in out[3], normalized to 0xFF = 1.0
void decodeHexColorNorm(char* s, float* out);