Record Sources

The DayElement class is the highest level of actual XML parsing.  Above that, we have one level which depends on the fact -- not specificable at the XML level -- that the first and last lines of a day element stand alone on single lines.  This allows us to avoid expensive element parsing when we know hat record we want (we get the record name from the calendar logic covered in the last post).  There is a top-level element (hoursinfo) but these top-level handlers actually ignore it.

The core interface is a single-function abstract class:

class IDayRecordSource

{

public:

  virtual ~IDayRecordSource();

  virtual std::unique_ptr<IFullDayInfo> getFullDayInfo(const std::string& inName) const = 0;

};

Note that at this level we do not export the information as an XML element, but only as an interface.

The simplest implementation wraps a stream reference and does a linear search through the stream for the element in question:

class StreamDayRecordSource : public IDayRecordSource

{

public:

  StreamDayRecordSource(std::istream& inStream,

const IPsalter & inPsalter,

const Use inUse,

const HymnSeasons inSeason,

const Days inDay):

    m_stream(inStream),

    m_psalter(inPsalter),

    m_use(inUse),

    m_season(inSeason),

    m_day(inDay)

  { }

  ~StreamDayRecordSource() override;

  std::unique_ptr<IFullDayInfo> getFullDayInfo(const std::string& inName) const override;

private:

  std::istream& m_stream;

  const IPsalter & m_psalter;

  Use m_use;

  HymnSeasons m_season;

  Days m_day;

};

std::unique_ptr<IFullDayInfo>

StreamDayRecordSource::getFullDayInfo(const std::string &inName) const

{

  std::string elt;

  std::string sstring("name=\"" + inName + "\"");

  while (m_stream)

    {

      std::string s;

      std::getline(m_stream, s);

      if (s.find("<day") == std::string::npos)

        continue;

      if (s.find(sstring) == std::string::npos)

        continue;

      else

        {

  auto ind = s.find('<');

  if (ind != 0)

    elt = s.substr(ind);

  else

    elt = s;

          break;

        }

    }

  if (!m_stream || elt.empty())

    {

      return std::make_unique<NullFullDayInfo>();

    }

  while (m_stream)

    {

      std::string s;

      std::getline(m_stream, s);

      elt.append(s);

      if (s.find("</day") != std::string::npos)

        {

          return std::make_unique<DayElement>(m_psalter, m_use, m_season, elt, m_day);

        }

    }

  return std::make_unique<NullFullDayInfo>();

}

This is, on its own, of limited use (suited mainly for diagnostic utilities), as we frequently need two or more elements, and they will only sometimes be in the appropriate order.  So there is an indexed in-memory storage class as well:

class IndexedDayRecordSource : public IDayRecordSource

{

public:

  IndexedDayRecordSource(std::istream &inStream, const IPsalter &inPsalter,

                         const Use inUse, const HymnSeasons inSeason,

                         const Days inDay);

  ~IndexedDayRecordSource() override;

  std::unique_ptr<IFullDayInfo>

  getFullDayInfo(const std::string &inName) const override;

private:

  std::unordered_map<std::string, std::string> m_records;

  const IPsalter &m_psalter;

  Use m_use;

  HymnSeasons m_season;

  Days m_day;

};

In this class, the entire stream is broken into text blocks using logic similar to that used in the stream class in the constructor.  Here's the first cut at the constructor:

IndexedDayRecordSource::IndexedDayRecordSource(std::istream &inStream,

       const IPsalter & inPsalter,

                                               const Use inUse,

                                               const HymnSeasons inSeason,

                                               const Days inDay)

  : m_psalter(inPsalter), m_use(inUse), m_season(inSeason), m_day(inDay)

{

  while (inStream)

    {

      std::string elt;

      while (inStream)

{

  std::string s;

  std::getline(inStream, s);

  if (s.find("<day") == std::string::npos)

    continue;

  auto ind = s.find('<');

  if (ind != 0)

    elt = s.substr(ind);

  else

    elt = s;

  break;

        }

      if (!inStream || elt.empty())

return;

      while (inStream)

{

  std::string s;

  std::getline(inStream, s);

  elt.append(s);

  if (s.find("</day") != std::string::npos)

    break;

}

      if (elt.find("</day") == std::string::npos)

return;

      auto index = elt.find("name=\"");

      if (index == std::string::npos)

continue;

      std::string sub = elt.substr(index+6);

      index = sub.find('"');

      if (index == std::string::npos)

continue;

      std::string name = sub.substr(0, index);

      m_records.insert(std::make_pair(name, elt));

    }

}

while the accessor just searches the hash table for the value:

std::unique_ptr<IFullDayInfo>

IndexedDayRecordSource::getFullDayInfo(const std::string &inName) const

{

  auto iter = m_records.find(inName);

  if (iter == m_records.end())

    {

      return std::make_unique<NullFullDayInfo>();

    }

  return std::make_unique<DayElement>(m_psalter, m_use, m_season, iter->second, m_day);

}

However, it's worth noting that the logical structure in both implementations is similar, leading to a small refactoring.  We can extract the similarities in logic, with variation provided by specifying the predicate with closure and handling the returned values differently:

class DayElementGetter

{

public:

  DayElementGetter(std::istream &inStream): m_stream(inStream) {}

  std::string

  getText(std::function<bool(const std::string &)> inCondition) const;

private:

  std::istream &m_stream;

};

std::string DayElementGetter::getText(

    std::function<bool(const std::string &)> inCondition) const

{

  std::string elt;

  while (m_stream)

    {

      std::string s;

      std::getline(m_stream, s);

      if (inCondition(s))

        {

  auto ind = s.find('<');

  if (ind != 0)

    elt = s.substr(ind);

  else

    elt = s;

          break;

        }

    }

  if (!m_stream || elt.empty())

    {

      return {};

    }

  while (m_stream)

    {

      std::string s;

      std::getline(m_stream, s);

      elt.append(s);

      if (s.find("</day") != std::string::npos)

        {

          return elt;

        }

    }

  return {};

}

With this in place the Stream function now looks like:

std::unique_ptr<IFullDayInfo>

StreamDayRecordSource::getFullDayInfo(const std::string &inName) const

{

  std::string sstring("name=\"" + inName + "\"");

  DayElementGetter getter(m_stream);

  auto l = [&sstring](const std::string& inRecord) {

    return (inRecord.find("<day") != std::string::npos)

      && (inRecord.find(sstring) != std::string::npos);

  };

  std::string elt = getter.getText(l);

  if (elt.empty())

    return std::make_unique<NullFullDayInfo>();

  return std::make_unique<DayElement>(m_psalter, m_use, m_season, elt, m_day);

}

and the Indexed constructor now looks like:

IndexedDayRecordSource::IndexedDayRecordSource(std::istream &inStream,

       const IPsalter & inPsalter,

                                               const Use inUse,

                                               const HymnSeasons inSeason,

                                               const Days inDay)

  : m_psalter(inPsalter), m_use(inUse), m_season(inSeason), m_day(inDay)

{

  DayElementGetter getter(inStream);

  while (inStream)

    {

      std::string elt = getter.getText([](const std::string& inRecord) {

         return (inRecord.find("<day") != std::string::npos);

      });

      if (elt.empty())

break;

      auto index = elt.find("name=\"");

      if (index == std::string::npos)

continue;

      std::string sub = elt.substr(index+6);

      index = sub.find('"');

      if (index == std::string::npos)

continue;

      m_records.insert(std::make_pair(sub.substr(0, index), elt));

    }

}


Comments

Popular posts from this blog

Boundaries

State Machines

Considerations on an Optimization