Configuration
In principle, managing configuration -- via the environment, the command line, database lookups (note from experience: great for keeping one copy across multiple systems, frequently a massive pain when doing diagnostics on production problems when you don't have access to production), or flat files should really be handed off to something like boost or another third party library. All the problems involving configurations have been met and surmounted many times before.
But the point of this exercise was not to farm out functionality to boost (on any large scale), but to exercise modern coding skills.
So, of course I built a basic config file parser. However, it's in XML rather than the old-fashioned line config file model, which allows a certain amount of extra flexibility to be built into the structure. This also means that keys and values can include spaces.
This is another class that is over in general utilities, not in the immediate project.
class ConfigFileReader
{
public:
ConfigFileReader(std::istream &inStream, const std::string &inTopElement);
std::string getValue(const std::string &inKey) const;
std::string getGroupValue(const std::string &inGroup,
const std::string &inKey) const;
std::string getValueWithDefault(const std::string &inKey,
const std::string &inDefault) const;
std::string getGroupValueWithDefault(const std::string &inGroup,
const std::string &inKey,
const std::string &inDefault) const;
void overrideFromCommandLine(const int argc, const char **argv);
private:
static bool IsEmpty(const std::string &inVal)
{
return inVal.empty() || inVal.starts_with("<!--");
}
void insertParsedValue(const std::string &inCurrentGroup,
const std::string &inKey, const std::string &inValue,
const std::string &inOverride);
static std::string ExtractAttribute(std::string_view inVal,
const std::string &inAttrib,
const bool inIsMandatory = true);
std::map<std::string, std::map<std::string, std::string>> m_data;
std::map<std::string, std::pair<std::string, std::string>> m_overrides;
};
The config file is basically an old-style line-based config file with a thin veneer of XML over it.
Instead of
[Heading]
Item Foo
Item2 Bar
we support
<!-- Sample Config File >
<FooConfig>
<Group name="Heading"> <!-- optional, lines can appear outside a heading
<Record type="Item" value="Foo"/>
<Record type="Item2" value="Bar"/>
</Group>
</FooConfig>
<configs> can be used instead of <Group name="configs"> and is meant for other configuration files.
There are three enhancements:
1) We can use a different element, EnvironmentRecord, where the value attribute is an environment variable. This will be replaced with the value looked up by getenv().
2) We can provide an additional attribute to either type, override, which specifies a command-line parameter whose associated value will override the value provided when overrideFromCommandLine() is called with the command line values.
3) If we use the element DeferredRecord, we do not provide a value but do provide an override. In that case the call to overrideFromCommandLine() will create a record with the specified key and the command-line value.
ConfigFileReader::ConfigFileReader(std::istream &inStream,
const std::string &inTopElement)
{
if (!inStream.good())
return;
std::string s;
while (inStream && IsEmpty(s))
{
std::getline(inStream, s);
boost::algorithm::trim(s);
}
if (!inStream)
return;
if (s != "<"s + inTopElement + ">"s)
throw std::runtime_error(
"ConfigFileReader():First substantive line is not an XML element");
s.clear();
std::getline(inStream, s);
std::string currentGroup;
std::string end("</"s + inTopElement + ">"s);
while (inStream && (s != end))
{
boost::algorithm::trim(s);
if (!IsEmpty(s))
{
if (s.starts_with("<Group"))
{
currentGroup = ExtractAttribute(s, "name"s);
}
else if ((s == "</Group>") || (s == "</configs>")))
{
currentGroup.clear();
}
else if (s == "<configs>")
{
currentGroup = "configs";
}
else if (s.starts_with("<Record"))
{
insertParsedValue(currentGroup, ExtractAttribute(s, "type"s),
ExtractAttribute(s, "value"),
ExtractAttribute(s, "override", false));
}
else if (s.starts_with("<DeferredRecord"))
{
if (std::string overrideFlag = ExtractAttribute(s, "override"); !overrideFlag.empty())
m_overrides.insert(std::make_pair(
overrideFlag,
std::pair<std::string, std::string>{ currentGroup, ExtractAttribute(s, "type"s) }));
}
else if (s.starts_with("<EnvironmentRecord"))
{
std::string value_key = ExtractAttribute(s, "value");
if (char *cp = std::getenv(value_key.c_str()); cp != nullptr)
{
insertParsedValue(currentGroup, ExtractAttribute(s, "type"s),
cp,
ExtractAttribute(s, "override", false));
}
}
}
std::getline(inStream, s);
}
}
The key private functions assisting in the parse are:
std::string ConfigFileReader::ExtractAttribute(std::string_view inVal,
const std::string &inAttrib,
const bool inIsMandatory)
{
std::string_view rval(inVal);
auto index = rval.find(inAttrib + "=\"");
if (index == std::string::npos)
{
if (inIsMandatory)
throw std::runtime_error(
"ConfigFileReader::extractAttribute():Illegal element with no "
+ inAttrib + ": " + std::string(inVal));
else
return {};
}
rval.remove_prefix(index + inAttrib.length() + 2);
index = rval.find('"');
if (index == std::string::npos)
{
throw std::runtime_error("ConfigFileReader():Illegal element with bad "
+ inAttrib
+ " attribute: " + std::string(inVal));
}
return std::string(rval.substr(0, index));
}
void ConfigFileReader::insertParsedValue(const std::string &inCurrentGroup,
const std::string &inKey,
const std::string &inValue,
const std::string &inOverride)
{
if (auto g = m_data.find(inCurrentGroup); g == m_data.end())
{
std::map<std::string, std::string> mval{ { { inKey }, { inValue } } };
m_data.emplace(inCurrentGroup, mval);
}
else
g->second.emplace(inKey, inValue);
if (!inOverride.empty())
m_overrides.emplace(inOverride, std::pair<std::string, std::string>{
inCurrentGroup, inKey });
}
It may be worth pausing to note just how thoroughly this is permeated by C++11 and later (mainly later) features: string_view, starts_with, emplace, scoped initializations in if statements, container initializaion with set values, string literals.
We currently have six different types of conditions we are handling during that loop (actually, seven, but two are collapsed into one test). These are currently short branches, but there's obviously scope for the number of cases to expand as the number of elements expands. Is there a way of fitting this into a command pattern of some sort? Note that the conditions aren't simple equality tests, which makes for a simple map structure. We have a set of exclusive conditions which would be modelled, at best, as std::function<bool(const std::string&)>. We can't use that as a key, but we could have std::vector<std::pair<std::function<bool(const std::string&)>, std::function<void(const std::string&)>>> and use find_if to get a match. We would also have to make currentGroup a class variable instead of the current local variable.
Setting the actions up would look like:
std::vector<std::pair<std::function<bool(const std::string&)>, std::function<void(const std::string&)>>> m_actions;
std::function<bool(const std::string&)> key = [](const std::string& inVal){ return inVal.starts_with("<Record"); };
std::function<void(const std::string&)> action = [&](const std::string& inVal){
insertParsedValue(currentGroup, ExtractAttribute(inVal, "type"s),
ExtractAttribute(inVal, "value"),
ExtractAttribute(inVal, "override", false));
};
m_actions.push_back(std::make_pair(key, action));
...
and executing them would look like:
std::string foo("<Record>");
...
const auto& iter = std::ranges::find_if(actions, [&](auto& inVal) { return inVal.first(foo); });
if (iter != actions.end())
iter->second(foo);
Though it could be simplified by terminating the vector with
std::function<bool(const std::string&)> key = [](const std::string& inVal){ return true; };
std::function<void(const std::string&)> action = [&](const std::string& inVal){ };
m_actions.push_back(std::make_pair(key, action));
so that there would not need to be a test for the value:
std::ranges::find_if(actions, [&](auto& inVal) { return inVal.first(foo); })->second(foo);
The execution is obviously slightly cleaner (though not always, as some actions are very short and simple). The setup is slightly messier. Can it be cleaned up?
The tests have some commonalities. Many of them are simple starts_with comparisons, so we can extract that pattern to a class:
class StartTester
{
public:
StartTester(const std::string& inTest): m_test(inTest) { }
bool operator()(const std::string& inStr) { return inStr.starts_with(m_test); }
private:
std::string m_test;
};
That means that instead of the longer lambda we can just have:
std::function<void(const std::string&)> action = [&](const std::string& inVal){
insertParsedValue(currentGroup, ExtractAttribute(inVal, "type"s),
ExtractAttribute(inVal, "value"),
ExtractAttribute(inVal, "override", false));
};
m_actions.push_back(std::make_pair(StartTester("<Record"), action));
for the tests which are this encapsulated.
(Another alternative is to reduce all the tests to a single starts_with() test; we could then have
std::vector<std::pair<std::string, std::function<void(const std::string&)>>> m_actions;
const auto& iter = std::ranges::find_if(m_actions, [&](auto& inVal) { return foo.starts_with(inVal) ; });
if (iter != actions.end())
iter->second(foo);
(No good sentinel on this model, so the test has to remain.)
Initialization then looks like
std::function<void(const std::string&)> action = [&](const std::string& inVal){
insertParsedValue(currentGroup, ExtractAttribute(inVal, "type"s),
ExtractAttribute(inVal, "value"),
ExtractAttribute(inVal, "override", false));
};
actions.push_back(std::make_pair("<Record"s, action));
(which is not much of an improvement, actually).)
Is there any other reason to make this change, other than futureproofing the code?
Well, yes, there is. The extracted general functionality is useful for other types of single-line parsing. By encapsulating just that part, we make it available for other code as well as getting the actual configuration code as clean as possible.
class ConfigActionSet
{
public:
void add(const std::string& inCondition, std::function<void(const std::string&)>);
void terminateTests();
void execute(const std::string& inLine) const;
private:
std::vector<std::pair<std::function<bool(const std::string&)>, std::function<void(const std::string&)>>> m_actions;
};
void ConfigActionSet::add(const std::string &inCondition,
std::function<void(const std::string &)> inAction)
{
class StartTester
{
public:
StartTester(const std::string &inTest): m_test(inTest) {}
bool operator()(const std::string &inStr)
{
return inStr.starts_with(m_test);
}
private:
std::string m_test;
};
m_actions.push_back(std::make_pair(StartTester(inCondition), inAction));
}
void ConfigActionSet::terminateTests()
{
std::function<bool(const std::string &)> key
= [](const std::string &inVal) { return true; };
std::function<void(const std::string &)> action
= [&](const std::string &inVal) {};
m_actions.push_back(std::make_pair(key, action));
}
void ConfigActionSet::execute(const std::string &inLine) const
{
std::ranges::find_if(m_actions, [&](auto &inVal) {
return inVal.first(inLine);
})->second(inLine);
}
That's not a lot of logic encapsulated, but it makes the syntax clearer in the calling context. We can also break out the initializations into a separate function:
void ConfigFileReader::initializeActions()
{
m_actions.add("<Group", [&](const std::string &inLine) {
m_currentGroup = ExtractAttribute(inLine, "name"s);
});
m_actions.add("<configs", [&](const std::string &inLine) {
m_currentGroup = "configs";
});
m_actions.add("</Group",
[&](const std::string &inLine) { m_currentGroup.clear(); });
m_actions.add("</configs",
[&](const std::string &inLine) { m_currentGroup.clear(); });
m_actions.add("<Record", [&](const std::string &inLine) {
insertParsedValue(ExtractAttribute(inLine, "type"s),
ExtractAttribute(inLine, "value"),
ExtractAttribute(inLine, "override", false));
});
m_actions.add("<DeferredRecord", [&](const std::string &inLine) {
if (std::string overrideFlag = ExtractAttribute(inLine, "override");
!overrideFlag.empty())
m_overrides.insert(std::make_pair(
overrideFlag, std::pair<std::string, std::string>{
m_currentGroup, ExtractAttribute(inLine, "type"s) }));
});
m_actions.add("<EnvironmentRecord", [&](const std::string &inLine) {
std::string value_key = ExtractAttribute(inLine, "value");
if (char *cp = std::getenv(value_key.c_str()); cp != nullptr)
{
insertParsedValue(ExtractAttribute(inLine, "type"s), cp,
ExtractAttribute(inLine, "override", false));
}
});
m_actions.terminateTests();
}
We now have:
ConfigFileReader::ConfigFileReader(std::istream &inStream,
const std::string &inTopElement)
{
initializeActions();
if (!inStream.good())
return;
std::string s;
while (inStream && IsEmpty(s))
{
std::getline(inStream, s);
boost::algorithm::trim(s);
}
if (!inStream)
return;
if (s != "<"s + inTopElement + ">"s)
throw std::runtime_error(
"ConfigFileReader():First substantive line is not an XML element");
s.clear();
std::getline(inStream, s);
std::string end("</"s + inTopElement + ">"s);
while (inStream && (s != end))
{
boost::algorithm::trim(s);
if (!IsEmpty(s))
m_actions.execute(s);
std::getline(inStream, s);
}
}
We have an improvement in clarity and a more extensible model. (One advantage of the command model is that you can't break up a switch statement, but you can break up the tests you put into a command model into logical blocks, if they become unwieldy.)
This is effectively an alternative to the classic Chain of Responsibility pattern, except that:
1) The test and the execution are separated into different objects.
2) There is no passing on of the action from object to object; the passing is done by the STL algorithm. This means that there can be no branching tree, or sequence of partial operations corresponding to a decorator (though there is nothing to prevent an action from holding other actions in this model, so it's not ruled out).
3) Classic polymorphism with virtual functions is not required, just conformity to the function signature.
A variant using the std::ranges::filter_view would have a behaviour corresponding to Broadcast/Listener rather than a Chain of Responsibility.
If it is safe, i.e. if the executing function is stateless or has adequate safety (object lifetimes, reentrancy, thread-safety, etc.) it would also be possible to pass the retrieved function around for later execution rather than executing it immediately. That makes no sense here, though.
Breviary Configuration
An application will have its own variable set of needs, but some requirements will be common to just about any breviary application. So we can model that at an abstract level
class IConfigSource
{
public:
virtual ~IConfigSource();
virtual const std::string& getConfigDir() const = 0;
virtual std::string getHoursInfoFile() const = 0;
virtual std::string getPsalmsFile() const = 0;
virtual std::string getHymnsFile() const = 0;
virtual std::string getKalendarFile() const = 0;
virtual Use getUse() const = 0;
virtual int getLineLength() const = 0;
};
And use the utility config class to implement that:
class BaseConfigFileReader : public IConfigSource
{
public:
BaseConfigFileReader(std::istream &inStream,
const std::string &inTopElement):
m_config(inStream, inTopElement)
{
}
~BaseConfigFileReader() override;
const std::string &getConfigDir() const override;
std::string getHoursInfoFile() const override
{
return m_config.getGroupValueWithDefault("configs", "hours",
"hoursinfo.xml");
}
std::string getPsalmsFile() const override
{
return m_config.getGroupValueWithDefault("configs", "psalms",
"psalms.xml");
}
std::string getHymnsFile() const override
{
return m_config.getGroupValueWithDefault("configs", "hymns", "hymns.xml");
}
std::string getKalendarFile() const override
{
return m_config.getGroupValue("configs", "kalendar");
}
Use getUse() const override;
void overrideFromCommandLine(const int argc, const char **argv)
{
m_config.overrideFromCommandLine(argc, argv);
}
int getLineLength() const override
{
auto s = m_config.getValue("LineLength");
if (!s.empty())
{
return std::stoi(s);
}
return 80;
}
protected:
JSBUtil::ConfigFileReader m_config;
private:
mutable std::string m_directory;
};
This is meant to be extended in an application context.
Use BaseConfigFileReader::getUse() const
{
std::string s = m_config.getValue("use");
if ((s == "Sarum") || (s == "Anglican"))
return Use::Anglican;
return Use::Roman;
}
The directory for the other XML files (those common to all applications) can be derived from the file, from an environment variable, or ultimately from a hardcoded value:
const std::string &BaseConfigFileReader::getConfigDir() const
{
if (!m_directory.empty())
return m_directory;
m_directory = m_config.getGroupValue("configs", "directory");
if (m_directory.empty())
{
const char * cp = std::getenv("BREVIARY_HOME");
if (cp == nullptr)
m_directory = "/home/james/src/cpp20/breviary/";
else
m_directory = cp;
}
return m_directory;
}
It's set once and then used without re-setting.
Comments
Post a Comment