This file implements common functionality for lexical analysis, such as string comparison, tokenization (splitting strings by whitespace), and parsing arithmetic types from strings.

Classes, functions, and variables in this file
boolcompare_strings (const array< char > & first, const char * second)
boolcompare_strings (const string & first, const char * second, unsigned int second_length)
booltokenize (const char * str, unsigned int length, array< unsigned int > & tokens, hash_map< string, unsigned int > & names)
boolparse_float (const CharArray & token, double & value)
boolparse_uint (const CharArray & token, unsigned int & value, unsigned int base = 0)
boolparse_ulonglong (const CharArray & token, unsigned long long & value)
boolparse_int (const CharArray & token, int & value)
boolparse_long (const CharArray & token, long & value)
boolparse_long_long (const CharArray & token, long long & value)
boolparse_uint (const char(&) token[N], unsigned int & value, unsigned int base = 0)
structposition
structlexical_token
boolprint (const lexical_token< TokenType > & token, Stream & stream, Printer & printer)
voidread_error (const char * error, const position & pos)
boolemit_token (array< lexical_token< TokenType > > & tokens, const position & start, const position & end, TokenType type)
boolemit_token (array< lexical_token< TokenType > > & tokens, array< char > & token, const position & start, const position & end, TokenType type)
voidfree_tokens (array< lexical_token< TokenType > > & tokens)
boolexpect_token (const array< lexical_token< TokenType > > & tokens, const unsigned int & index, TokenType type, const char * name)
boolappend_to_token (array< char > & token, char32_t next, mbstate_t & shift)
bool compare_strings(
const array< char > &first,
const char *second)
Compares the strings given by the array<char> structure first and the null-terminated C string second.
Returns

true if the strings are equivalent, and false otherwise.

bool compare_strings(
const string &first,
const char *second,
unsigned intsecond_length)
Compares the strings given by the core::string first and the native char array second whose length is given by second_length.
Returns

true if the strings are equivalent, and false otherwise.

bool tokenize(
const char *str,
unsigned intlength,
array< unsigned int > &tokens,
hash_map< string, unsigned int > &names)

Tokenizes the given native char array str with length length, assigning to each unique token an unsigned int identifier. These identifiers are stored in the core::hash_map names. The tokenized identifiers are added to the core::array tokens.

template<typename CharArray>
bool parse_float(
const CharArray &token,
double &value)
Attempts to parse the string given by token as a double.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a floating-point number.

template<typename CharArray>
bool parse_uint(
const CharArray &token,
unsigned int &value,
unsigned intbase = 0)
Attempts to parse the string given by token as an unsigned int.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a unsigned integer.

template<typename CharArray>
bool parse_ulonglong(
const CharArray &token,
unsigned long long &value)
Attempts to parse the string given by token as an unsigned int.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a unsigned integer.

template<typename CharArray>
bool parse_int(
const CharArray &token,
int &value)
Attempts to parse the string given by token as an int.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a integer.

template<typename CharArray>
bool parse_long(
const CharArray &token,
long &value)
Attempts to parse the string given by token as a long.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a long.

template<typename CharArray>
bool parse_long_long(
const CharArray &token,
long long &value)
Attempts to parse the string given by token as a long.
CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a long.

template<size_t N>
bool parse_uint(
const char(&)token[N],
unsigned int &value,
unsigned intbase = 0)
Attempts to parse the string given by token as an unsigned int.
base

if 0, the numeric base of the integer is detected automatically in the same way as strtoul. Otherwise, the numeric base can be specified explicitly.

Returns

true if successful, or false if token is not an appropriate string representation of an unsigned integer.

struct position

Represents a position in a file. This structure is typically used to provide informative errors during lexical analysis of data from a file.

Public members
unsigned intline
unsigned intcolumn
position (unsigned int line, unsigned int column)
position (const position & p)
positionoperator + (unsigned int i) const
positionoperator - (unsigned int i) const
static boolcopy (const position & src, position & dst)
unsigned int position::line

The line number of the position in the file.

unsigned int position::column

The column number of the position in the file.

position::position(
unsigned intline,
unsigned intcolumn)

Constructs the position structure with the given line and column.

position::position(
const position &p)

Constructs the position structure by copying from p.

position position::operator + (
unsigned inti) const

Returns a position with the column number increased by i.

position position::operator - (
unsigned inti) const

Returns a position with the column number decreased by i.

static bool position::copy(
const position &src,
position &dst)

Copies the position structure from src into dst.

struct lexical_token

template<typename TokenType>

A structure representing a single token during lexical analysis. This structure is generic, intended for use across multiple lexical analyzers.

Public members
TokenTypetype
positionstart
positionend
stringtext
TokenType lexical_token::type

The generic type of this token.

position lexical_token::start

The start position (inclusive) of the token in the source file.

position lexical_token::end

The end position (exclusive) of the token in the source file.

string lexical_token::text

An (optional) string representing the contents of the token.

template<typename TokenType, typename Stream, typename Printer>
bool print(
const lexical_token< TokenType > &token,
Stream &stream,
Printer &printer)
Prints the given lexical_token token to the output stream.
Printer

a scribe type for which the functions print(const TokenType&, Stream&, Printer&) and print(const core::string& s, Stream&, Printer&) are defined.

void read_error(
const char *error,
const position &pos)

Reports an error with the given message error as a null-terminated C string at the given source file position pos to stderr.

template<typename TokenType>
bool emit_token(
array< lexical_token< TokenType > > &tokens,
const position &start,
const position &end,
TokenTypetype)

Constructs a lexical_token with the given start and end positions, and TokenType type, with an empty lexical_token::text message and appends it to the tokens array.

template<typename TokenType>
bool emit_token(
array< lexical_token< TokenType > > &tokens,
array< char > &token,
const position &start,
const position &end,
TokenTypetype)

Constructs a lexical_token with the given start and end positions, and TokenType type, with lexical_token::text copied from token and appends it to the tokens array.

template<typename TokenType>
void free_tokens(
array< lexical_token< TokenType > > &tokens)

Frees every element in the given tokens array. This function does not free the array itself.

template<typename TokenType>
bool expect_token(
const array< lexical_token< TokenType > > &tokens,
const unsigned int &index,
TokenTypetype,
const char *name)

Inspects the element at the given index in the tokens array. If index is not out of bounds, and the token at that index has type that matches the given type, the function returns true. Otherwise, an error message is printed to stderr indicating that the expected token was missing, with its name as part of the error message, and false is returned.

bool append_to_token(
array< char > &token,
char32_tnext,
mbstate_t &shift)

Appends the given wide character next to the char array token which represents a multi-byte string.