LT Project: Statistics

LtStats

Before moving onto the mechansims for retrieving LT records from the raw data, we will look at the LtStats class, which appears in the interfaces of the classes which do general data retrieval along with the full records.

class LtStats

{

using id_associated_value_map = std::map<std::string, std::vector<int>>;

public:

void add(const BaseLibraryBookRecord &inRecord);

int getCount() const { return m_count; }

int getElectronicCount() const { return m_ecount; }

int getAveragePages() const

{

if (m_pages.empty())

return 0;

return std::accumulate(m_pages.begin(), m_pages.end(), 0) / m_pages.size();

}

std::span<int> getReviewed() { return m_reviewed; }

std::map<std::string, std::vector<int>> &getTags() const { return m_tags; }

std::map<std::string, std::vector<int>> &getCollections() const

{

return m_collections;

}

std::map<int, std::vector<int>> &getYears() const { return m_years; }

std::map<std::string, std::vector<int>> &getSources() const

{

return m_sources;

}

private:

int m_count = 0;

int m_ecount = 0;

std::vector<int> m_pages;

mutable id_associated_value_map m_tags;

mutable id_associated_value_map m_collections;

std::vector<int> m_reviewed;

std::vector<int> m_archived;

mutable std::map<int, std::vector<int>> m_years;

mutable id_associated_value_map m_sources;

void addFromCommaString(id_associated_value_map &outMap,

const std::string &inString, const int inCurrentRec);

};

This is a fairly arbitrary set of statistics. Some of the values are associated with specific book ids, and can be used to retrieve that set of books (e.g. tags, collections, years, sources). Others are simply cumulative numbers or lists (reviewed, archived, count, pages).

The add() function extracts the values associated with a single record source (note, not a full book record; this does not require the overhead of creating a LibraryBookRecord and operates on the in-memory database records.

void LtStats::add(const BaseLibraryBookRecord &inRecord)

{

int currentRec = 0;

inRecord.process([&](const ELibraryRecord inRec, const std::string &inVal) {

switch (inRec)

{

using enum ELibraryRecord;

case Book_Id:

++m_count;

currentRec = std::atoi(inVal.c_str());

break;

case Page_Count:

m_pages.push_back(std::atoi(inVal.c_str()));

break;

case From_Where:

{

auto iter = m_sources.find(inVal);

if (iter == m_sources.end())

m_sources.insert({ inVal, std::vector<int>{ currentRec } });

else

iter->second.push_back(currentRec);

}

break;

case Media:

if (inVal == "Ebook")

++m_ecount;

break;

case Review:

m_reviewed.push_back(currentRec);

break;

case Private_Comment:

if (inVal.find("Box") != std::string::npos)

m_archived.push_back(currentRec);

break;

case Date:

{

int year = std::atoi(inVal.c_str());

if (year != 0)

{

auto iter = m_years.find(year);

if (iter == m_years.end())

m_years.insert({ year, std::vector<int>{ currentRec } });

else

iter->second.push_back(currentRec);

}

break;

case Tags:

addFromCommaString(m_tags, inVal, currentRec);

break;

case Collections:

addFromCommaString(m_collections, inVal, currentRec);

break;

default:

return;

}

});

}

addFromCommaString() captures the data from tags and collections raw records:

void LtStats::addFromCommaString(LtStats::id_associated_value_map &outMap,

const std::string &inString,

const int inCurrentRec)

{

static boost::char_separator<char> commaSeparator(",");

boost::tokenizer<boost::char_separator<char>> fields(inString,

commaSeparator);

std::ranges::for_each(fields, [&](const auto &inField) {

std::string modifiedName = boost::algorithm::trim_copy(inField);

auto iter = outMap.find(modifiedName);

if (iter == outMap.end())

outMap.insert({ modifiedName, { inCurrentRec } });

else

iter->second.push_back(inCurrentRec);

});

}

Though the for_each algorithm used above is certainly reasonable, there are other alternatives, which would require a little more setup.

You could create a class which would wrap the map and use the addition operator to add the string value: in that case you could use std::accumulate(), which is arguably a better indicator of intent.

Let's see about that:

class AssociatedValueMapAccumulator {

public:

AssociatedValueMapAccumulator() { }

AssociatedValueMapAccumulator(const std::map<std::string, std::vector<int>> inData): m_data(inData) { }

std::map<std::string, std::vector<int>> & getValues() const { return m_data; }

auto operator+(const std::string& inVal) const -> AssociatedValueMapAccumulator

{

std::string modifiedName = boost::algorithm::trim_copy(inVal);

auto iter = m_data.find(modifiedName);

if (iter == m_data.end())

m_data.insert({ modifiedName, { s_CurrentRec } });

else

iter->second.push_back(s_CurrentRec);

return { m_data };

}

static void SetCurrentRecordId(const int inId) { s_CurrentRec = inId; }

private:

mutable std::map<std::string, std::vector<int>> m_data;

static int s_CurrentRec;

};

This can be used for collections, sources, and tags, so we can replace the map in the using-declaration with this class. It does mean that on parsing the book id we have to add the line:

AssociatedValueMapAccumulator::SetCurrentRecordId(currentRec);

Prior to C++20 this would have been a poor idea, because std::accumulate used std::copy. Post C++20, it uses std::move. So in addition to the logic for sources, which uses std::move explicitly:

case From_Where:

m_sources = std::move(m_sources + inVal);

break;

We get std::move in std::accumulate:

void LtStats::addFromCommaString(

LtStats::AssociatedValueMapAccumulator &outMap,

const std::string &inString)

{

static boost::char_separator<char> commaSeparator(",");

boost::tokenizer<boost::char_separator<char>> fields(inString,

commaSeparator);

outMap = std::accumulate(fields.begin(), fields.end(), outMap);

}

We no longer need the inCurrentRec parameter, as it's now accessed from the class static member.

Overall, it's an improvement in clarity and brevity; I'll keep it. (An alternative would have to used a separate binary operator to update the map, defining += rather than + for the map. From wher I sit it's six of one and half-a-dozen of the other.

MultiIdRecordSetCriteria

Retrieval of the sets in the lists of ids uses a special RecordSetCriteria variant, the MultiIdRecordSetCriteria. This is very much like the Single Id version seen in an earlier post, except that it interns a set of book ids, and will retrieve them in bibliographic format in the normal case, but if there is a single id it behaves like the SingleId version and returns the full body of information.

class MultiIdRecordSetCriteria : public IRecordSetCriteria,

public IApplicableCriteria

{

public:

MultiIdRecordSetCriteria(

std::unique_ptr<ICriteriaActionStrategy> &&inStrategy):

m_strategy(std::move(inStrategy))

{

m_strategy->setCriteria(this);

}

MultiIdRecordSetCriteria(

std::span<int> inVals,

std::unique_ptr<ICriteriaActionStrategy> &&inStrategy);

~MultiIdRecordSetCriteria() override;

void addMatchingCriteria(IAdditionDiscriminatorSet &outSet,

const bool inBreakoutCollection) const override;

bool hasCollections() const override { return false; }

bool hasTags() const override { return false; }

void setBibliographic() override {}

bool isBibliographic() const override { return std::ssize(m_ids) > 1; }

bool hasSingleId() const override { return std::ssize(m_ids) == 1; }

bool matchesId(const int inId) const override

{

return m_ids.contains(inId);

}

int getId() const override

{

if (hasSingleId())

return *(m_ids.begin());

else

return 0;

}

MultiIdRecordSetCriteria *clone() const override;

void apply(const IRecordSource &inSource) override;

private:

std::set<int> m_ids;

std::unique_ptr<ICriteriaActionStrategy> m_strategy;

};

The constructor taking a span is used for the copy constructor, but it is used elsewhere as well, whence the indirection of passing the ids through a span.

MultiIdRecordSetCriteria:: MultiIdRecordSetCriteria(

std::span<int> inVals,

std::unique_ptr<ICriteriaActionStrategy> &&inStrategy):

m_strategy(std::move(inStrategy))

{

m_strategy->setCriteria(this);

std::ranges::copy(inVals, std::inserter(m_ids, m_ids.end()));

}

void MultiIdRecordSetCriteria::addMatchingCriteria(

IAdditionDiscriminatorSet &outSet,

[[maybe_unused]] const bool inBreakoutCollection) const

{

outSet.addDiscriminator(

std::make_unique<MultiIdAdditionDiscriminator>(m_ids));

}

void MultiIdRecordSetCriteria::apply(const IRecordSource &inSource)

{

m_strategy->process(inSource);

}

MultiIdRecordSetCriteria *MultiIdRecordSetCriteria::clone() const

{

std::vector<int> vals;

std::ranges::copy(m_ids, std::back_inserter(vals));

return new MultiIdRecordSetCriteria(

vals, std::unique_ptr<ICriteriaActionStrategy>(m_strategy->clone()));

}

This class is used only when collecting stats and generating queries from them.

Search This Blog

C++ Development: The Breviary Project