features.h

This file contains structures for representing sets of discrete features, where each feature_set has `n`

features, and each feature is of type `unsigned int`

.

Classes, functions, and variables in this file | |
---|---|

struct | feature_set |

bool | init (feature_set & set, unsigned int feature_count) |

bool | init (feature_set & set, const feature_set & src) |

bool | operator == (const feature_set & first, const feature_set & second) |

struct | feature_set_sorter |

bool | less_than (const FeatureSet & first, const FeatureSet & second, const feature_set_sorter & sorter) |

struct | weighted_feature_set |

bool | init (weighted_feature_set< V > & set, unsigned int feature_count) |

bool | init (weighted_feature_set< V > & set, const feature_set & src, V log_probability) |

bool | operator < (const weighted_feature_set< V > & first, const weighted_feature_set< V > & second) |

double | log_probability (const weighted_feature_set< V > & set) |

This struct represents a sequence of features, with length feature_set::feature_count, each feature having type `unsigned int`

.

Public members | |
---|---|

unsigned int * | features |

unsigned int | feature_count |

unsigned int ** | excluded |

unsigned int * | excluded_counts |

feature_set (unsigned int feature_count) | |

feature_set (const feature_set & src) | |

unsigned int | operator [] (unsigned int index) const |

unsigned int & | operator [] (unsigned int index) |

void | set_feature (unsigned int index, unsigned int value) |

bool | is_excluded (unsigned int index, unsigned int value) const |

bool | ensure_excluded_capacity (unsigned int index, unsigned int capacity) |

bool | set_excluded (unsigned int index, const unsigned int * src, unsigned int count) |

void | exclude_unsorted (unsigned int index, unsigned int item) |

void | sort_excluded (unsigned int index) |

static unsigned int | hash (const feature_set & set) |

static void | move (const feature_set & src, feature_set & dst) |

static void | swap (feature_set & first, feature_set & second) |

static bool | is_empty (const feature_set & set) |

static void | set_empty (feature_set & set) |

static void | set_empty (feature_set * sets, unsigned int length) |

static void | free (feature_set & set) |

unsigned int * feature_set::features

The native array that stores the feature values.

unsigned int feature_set::feature_count

The number of features and the length of feature_set::features, feature_set::excluded_counts, and the first dimension of feature_set::excluded.

unsigned int ** feature_set::excluded

A two-dimensional array containing *excluded* feature values. That is, `excluded[i]`

contains the set of excluded feature values for the feature at index `i`

. Each `excluded[i]`

is a sorted array of `unsigned int`

feature values containing distinct elements. The length of `excluded[i]`

is given by `feature_set::excluded_counts[i]`

. For each `j`

such that `feature_set::excluded_counts[j] == 0`

, `excluded[j]`

is `NULL`

.

unsigned int * feature_set::excluded_counts

An array that stores the length of each `feature_set::excluded[i]`

.

feature_set::feature_set(

unsigned int | feature_count | ) |

Initializes this feature set with the given `feature_count`

. Each element in feature_set::features is uninitialized, whereas feature_set::excluded_counts is initialized to all zeros.

feature_set::feature_set(

const feature_set & | src | ) |

Initializes this feature set by copying from the given feature_set `src`

.

unsigned int feature_set::operator [] (

unsigned int | index | ) const |

Returns the feature value at the given `index`

.

unsigned int & feature_set::operator [] (

unsigned int | index | ) |

Returns the feature value at the given `index`

.

void feature_set::set_feature(

unsigned int | index, | |

unsigned int | value | ) |

Sets the feature value at the given `index`

.

bool feature_set::is_excluded(

unsigned int | index, | |

unsigned int | value | ) const |

Returns `true`

if the given feature `value`

is excluded from the feature at the given `index`

. Otherwise, this function returns `false`

.

bool feature_set::ensure_excluded_capacity(

unsigned int | index, | |

unsigned int | capacity | ) |

Checks that the excluded array at `feature_set::excluded[index]`

has sufficient `capacity`

. If that array is `NULL`

, this function will initialize it (leaving its elements uninitialized).

bool feature_set::set_excluded(

unsigned int | index, | |

const unsigned int * | src, | |

unsigned int | count | ) |

Initializes the excluded array at `feature_set::excluded[index]`

and copies the contents from `src`

. This function assumes `feature_set::excluded[index]`

is previously uninitialized.

void feature_set::exclude_unsorted(

unsigned int | index, | |

unsigned int | item | ) |

Excludes the given `item`

from the feature at the given `index`

, without sorting the excluded array `feature_set::excluded[index]`

. `feature_set::excluded_counts[index]`

is also incremented. This function assumes the excluded array does not already contain `item`

and has sufficient capacity.

void feature_set::sort_excluded(

unsigned int | index | ) |

Sorts the excluded array at `feature_set::excluded[index]`

. This function assumes the excluded array is not `NULL`

.

static unsigned int feature_set::hash(

const feature_set & | set | ) |

Evaluates the hash function for the given feature_set `set`

.

static void feature_set::move(

const feature_set & | src, | |

feature_set & | dst | ) |

Moves the feature_set in `src`

to `dst`

. Note that this function does not initialize the fields in `dst`

and copy the contents from the corresponding fields in `src`

into `dst`

. Rather, this function simply copies the pointers.

static void feature_set::swap(

feature_set & | first, | |

feature_set & | second | ) |

Swaps the underlying pointers in the feature_sets `src`

and `dst`

.

static bool feature_set::is_empty(

const feature_set & | set | ) |

Returns whether feature_set::features is `NULL`

in the given `set`

.

static void feature_set::set_empty(

feature_set & | set | ) |

Sets feature_set::features to `NULL`

in the given `set`

.

static void feature_set::set_empty(

feature_set * | sets, | |

unsigned int | length | ) |

Sets feature_set::features to `NULL`

in every element of the given array `sets`

.

static void feature_set::free(

feature_set & | set | ) |

Frees the underlying arrays in the given feature_set `set`

.

bool init(

feature_set & | set, | |

unsigned int | feature_count | ) |

Initializes the given feature_set `set`

with the given `feature_count`

. Each element in feature_set::features is uninitialized, whereas feature_set::excluded_counts is initialized to all zeros.

bool init(

feature_set & | set, | |

const feature_set & | src | ) |

Initializes the given feature_set `set`

by copying from the given feature_set `src`

.

bool operator == (

const feature_set & | first, | |

const feature_set & | second | ) |

Returns whether the feature_set in `first`

is equivalent to the one in `second`

. For equivalence, the feature sets must have the same sequence of features, as well as the same set of excluded features at every index.

A sorter for feature sets that enables lexicographical sorting according to the sequence of feature values.

Public members | |
---|---|

feature_set_sorter (unsigned int depth) |

feature_set_sorter::feature_set_sorter(

unsigned int | depth | ) |

Initializes the feature_set_sorter with the given `depth`

. This feature_set_sorter sorts feature set objects that have length `depth - 1`

.

template<typename FeatureSet>

bool less_than(const FeatureSet & | first, | |

const FeatureSet & | second, | |

const feature_set_sorter & | sorter | ) |

Compares the given feature set objects `first`

and `second`

using the feature_set_sorter `sorter`

.

Template parameters | |
---|---|

FeatureSet |
a feature set type that implements the function |

template<typename V>

This structure represents a feature_set weighted by a numerical value, such as a probability.

Template parameters | |
---|---|

V |
satisfies is_arithmetic. |

Public members | |
---|---|

feature_set | features |

V | log_probability |

unsigned int | get_feature (unsigned int index) const |

void | set_feature (unsigned int index, unsigned int feature) |

bool | ensure_excluded_capacity (unsigned int index, unsigned int capacity) |

bool | set_excluded (unsigned int index, const unsigned int * src, unsigned int count) |

void | exclude_unsorted (unsigned int index, unsigned int item) |

void | sort_excluded (unsigned int index) |

V | get_probability () const |

void | set_probability (V probability) |

static void | move (const weighted_feature_set< V > & src, weighted_feature_set< V > & dst) |

static void | swap (weighted_feature_set< V > & first, weighted_feature_set< V > & second) |

static void | free (weighted_feature_set< V > & set) |

feature_set weighted_feature_set::features

The underlying feature_set.

V weighted_feature_set::log_probability

The weight of weighted_feature_set::features.

unsigned int weighted_feature_set::get_feature(

unsigned int | index | ) const |

Returns the feature value at the given `index`

.

void weighted_feature_set::set_feature(

unsigned int | index, | |

unsigned int | feature | ) |

Sets the feature value at the given `index`

to `feature`

.

bool weighted_feature_set::ensure_excluded_capacity(

unsigned int | index, | |

unsigned int | capacity | ) |

Checks that the excluded array at `feature_set::excluded[index]`

in `weighted_feature_set::features`

has sufficient capacity. If that array is `NULL`

, this function will initialize it (leaving its elements uninitialized).

bool weighted_feature_set::set_excluded(

unsigned int | index, | |

const unsigned int * | src, | |

unsigned int | count | ) |

Initializes the excluded array at `feature_set::excluded[index]`

in `weighted_feature_set::features`

and copies the contents from `src`

. This function assumes `feature_set::excluded[index]`

is previously uninitialized.

void weighted_feature_set::exclude_unsorted(

unsigned int | index, | |

unsigned int | item | ) |

Excludes the given item from the feature at the given `index`

, without sorting the excluded array `feature_set::excluded[index]`

in `weighted_feature_set::features`

. `feature_set::excluded_counts[index]`

is also incremented. This function assumes the excluded array does not already contain `item`

and has sufficient capacity.

void weighted_feature_set::sort_excluded(

unsigned int | index | ) |

Sorts the excluded array at `feature_set::excluded[index]`

in `weighted_feature_set::features`

. This function assumes the excluded array is not `NULL`

.

V weighted_feature_set::get_probability()

Returns the natural exponent of the weight weighted_feature_set::log_probability.

void weighted_feature_set::set_probability(

V | probability | ) |

Sets the weight weighted_feature_set::log_probability to the natural logarithm of the given `probability`

.

static void weighted_feature_set::move(

const weighted_feature_set< V > & | src, | |

weighted_feature_set< V > & | dst | ) |

Moves the given weighted_feature_set in `src`

into `dst`

.

static void weighted_feature_set::swap(

weighted_feature_set< V > & | first, | |

weighted_feature_set< V > & | second | ) |

Swaps the weighted_feature_set structures in `first`

and `second`

.

static void weighted_feature_set::free(

weighted_feature_set< V > & | set | ) |

Frees the given weighted_feature_set `set`

.

template<typename V>

bool init(weighted_feature_set< V > & | set, | |

unsigned int | feature_count | ) |

Initializes the given weighted_feature_set `set`

with the given `feature_count`

. Each element in feature_set::features in weighted_feature_set::features is uninitialized, whereas feature_set::excluded_counts is initialized to all zeros. The weight weighted_feature_set::log_probability is uninitialized.

template<typename V>

bool init(weighted_feature_set< V > & | set, | |

const feature_set & | src, | |

V | log_probability | ) |

Initializes the given weighted_feature_set `set`

with the given feature_set `src`

and weight `log_probability`

.

template<typename V>

bool operator < (const weighted_feature_set< V > & | first, | |

const weighted_feature_set< V > & | second | ) |

Returns whether weighted_feature_set::log_probability of `first`

is *greater than* that of `second`

.

Returns the weight weighted_feature_set::log_probability of `set`

.