core/lex.h Documentation

lex.h

This file implements common functionality for lexical analysis, such as string comparison, tokenization (splitting strings by whitespace), and parsing arithmetic types from strings.

Classes, functions, and variables in this file
bool	compare_strings (const array< char > & first, const char * second)
bool	compare_strings (const string & first, const char * second, unsigned int second_length)
bool	tokenize (const char * str, unsigned int length, array< unsigned int > & tokens, hash_map< string, unsigned int > & names)
bool	parse_float (const CharArray & token, double & value)
bool	parse_uint (const CharArray & token, unsigned int & value, unsigned int base = 0)
bool	parse_ulonglong (const CharArray & token, unsigned long long & value)
bool	parse_int (const CharArray & token, int & value)
bool	parse_long (const CharArray & token, long & value)
bool	parse_long_long (const CharArray & token, long long & value)
bool	parse_uint (const char(&) token[N], unsigned int & value, unsigned int base = 0)
struct	position
struct	lexical_token
bool	print (const lexical_token< TokenType > & token, Stream & stream, Printer & printer)
void	read_error (const char * error, const position & pos)
bool	emit_token (array< lexical_token< TokenType > > & tokens, const position & start, const position & end, TokenType type)
bool	emit_token (array< lexical_token< TokenType > > & tokens, array< char > & token, const position & start, const position & end, TokenType type)
void	free_tokens (array< lexical_token< TokenType > > & tokens)
bool	expect_token (const array< lexical_token< TokenType > > & tokens, const unsigned int & index, TokenType type, const char * name)
bool	append_to_token (array< char > & token, char32_t next, mbstate_t & shift)

[view source]

bool compare_strings(

const array< char > &	first,
const char *	second	)

Compares the strings given by the array<char> structure first and the null-terminated C string second.

Returns

true if the strings are equivalent, and false otherwise.

[view source]

bool compare_strings(

const string &	first,
const char *	second,
unsigned int	second_length	)

Compares the strings given by the core::string first and the native char array second whose length is given by second_length.

Returns

true if the strings are equivalent, and false otherwise.

[view source]

bool tokenize(

const char *	str,
unsigned int	length,
array< unsigned int > &	tokens,
hash_map< string, unsigned int > &	names	)

Tokenizes the given native char array str with length length, assigning to each unique token an unsigned int identifier. These identifiers are stored in the core::hash_map names. The tokenized identifiers are added to the core::array tokens.

[view source]

template<typename CharArray>

bool parse_float(

const CharArray &	token,
double &	value	)

Attempts to parse the string given by token as a double.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a floating-point number.

[view source]

template<typename CharArray>

bool parse_uint(

const CharArray &	token,
unsigned int &	value,
unsigned int	base = 0	)

Attempts to parse the string given by token as an unsigned int.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a unsigned integer.

[view source]

template<typename CharArray>

bool parse_ulonglong(

const CharArray &	token,
unsigned long long &	value	)

Attempts to parse the string given by token as an unsigned int.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a unsigned integer.

[view source]

template<typename CharArray>

bool parse_int(

const CharArray &	token,
int &	value	)

Attempts to parse the string given by token as an int.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a integer.

[view source]

template<typename CharArray>

bool parse_long(

const CharArray &	token,
long &	value	)

Attempts to parse the string given by token as a long.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a long.

[view source]

template<typename CharArray>

bool parse_long_long(

const CharArray &	token,
long long &	value	)

Attempts to parse the string given by token as a long.

CharArray

a string type that implements two fields: (1) data which returns the underlying char* array, and (2) length which returns the length of the string.

Returns

true if successful, or false if there is insufficient memory or token is not an appropriate string representation of a long.

[view source]

template<size_t N>

bool parse_uint(

const char(&)	token[N],
unsigned int &	value,
unsigned int	base = 0	)

Attempts to parse the string given by token as an unsigned int.

base	if `0`, the numeric base of the integer is detected automatically in the same way as strtoul. Otherwise, the numeric base can be specified explicitly.

Returns

true if successful, or false if token is not an appropriate string representation of an unsigned integer.

struct position
[view source]

Represents a position in a file. This structure is typically used to provide informative errors during lexical analysis of data from a file.

Public members
unsigned int	line
unsigned int	column
	position (unsigned int line, unsigned int column)
	position (const position & p)
position	operator + (unsigned int i) const
position	operator - (unsigned int i) const
static bool	copy (const position & src, position & dst)

[view source]

unsigned int position::line

The line number of the position in the file.

[view source]

unsigned int position::column

The column number of the position in the file.

[view source]

position::position(

unsigned int	line,
unsigned int	column	)

Constructs the position structure with the given line and column.

[view source]

position::position(

const position & p )

Constructs the position structure by copying from p.

[view source]

position position::operator + (

unsigned int i ) const

Returns a position with the column number increased by i.

[view source]

position position::operator - (

unsigned int i ) const

Returns a position with the column number decreased by i.

[view source]

static bool position::copy(

const position &	src,
position &	dst	)

Copies the position structure from src into dst.

struct lexical_token
[view source]

template<typename TokenType>

A structure representing a single token during lexical analysis. This structure is generic, intended for use across multiple lexical analyzers.

Public members
TokenType	type
position	start
position	end
string	text

[view source]

TokenType lexical_token::type

The generic type of this token.

[view source]

position lexical_token::start

The start position (inclusive) of the token in the source file.

[view source]

position lexical_token::end

The end position (exclusive) of the token in the source file.

[view source]

string lexical_token::text

An (optional) string representing the contents of the token.

[view source]

template<typename TokenType, typename Stream, typename Printer>

bool print(

const lexical_token< TokenType > &	token,
Stream &	stream,
Printer &	printer	)

Prints the given lexical_token token to the output stream.

Printer

a scribe type for which the functions print(const TokenType&, Stream&, Printer&) and print(const core::string& s, Stream&, Printer&) are defined.

[view source]

void read_error(

const char *	error,
const position &	pos	)

Reports an error with the given message error as a null-terminated C string at the given source file position pos to stderr.

[view source]

template<typename TokenType>

bool emit_token(

array< lexical_token< TokenType > > &	tokens,
const position &	start,
const position &	end,
TokenType	type	)

Constructs a lexical_token with the given start and end positions, and TokenType type, with an empty lexical_token::text message and appends it to the tokens array.

[view source]

template<typename TokenType>

bool emit_token(

array< lexical_token< TokenType > > &	tokens,
array< char > &	token,
const position &	start,
const position &	end,
TokenType	type	)

Constructs a lexical_token with the given start and end positions, and TokenType type, with lexical_token::text copied from token and appends it to the tokens array.

[view source]

template<typename TokenType>

void free_tokens(

array< lexical_token< TokenType > > & tokens )

Frees every element in the given tokens array. This function does not free the array itself.

[view source]

template<typename TokenType>

bool expect_token(

const array< lexical_token< TokenType > > &	tokens,
const unsigned int &	index,
TokenType	type,
const char *	name	)

Inspects the element at the given index in the tokens array. If index is not out of bounds, and the token at that index has type that matches the given type, the function returns true. Otherwise, an error message is printed to stderr indicating that the expected token was missing, with its name as part of the error message, and false is returned.

[view source]

bool append_to_token(

array< char > &	token,
char32_t	next,
mbstate_t &	shift	)

Appends the given wide character next to the char array token which represents a multi-byte string.

struct position[view source]

struct lexical_token[view source]

struct position
[view source]

struct lexical_token
[view source]