Skip to content

Commit

Permalink
Introduce App.StringHasher and StringID
Browse files Browse the repository at this point in the history
Split Document::mapStringIDs function into StringHasher

StringHasher function as a persistent string hash/indexer. It stores the
string into a internal map, and index the string with an incremental
integer ID. The ID is returned as a reference counted object StringID.
By default StringHasher only persists used strings.  Strings longer than
a configurable length threshold will be internally hashed with SHA1 and
original text discarded.

Both StringHasher and StringID are exposed to Python. Document object
has a default hasher object. Other property can easily embed their own
string hasher.
  • Loading branch information
realthunder committed Mar 5, 2018
1 parent 2ddd4b4 commit e057cc1
Show file tree
Hide file tree
Showing 12 changed files with 801 additions and 110 deletions.
5 changes: 5 additions & 0 deletions src/App/Application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
#include <Base/QuantityPy.h>
#include <Base/UnitPy.h>

#include "StringHasherPy.h"
#include "GeoFeature.h"
#include "FeatureTest.h"
#include "FeaturePython.h"
Expand Down Expand Up @@ -231,6 +232,7 @@ Application::Application(std::map<std::string,std::string> &mConfig)
Base::Interpreter().addType(&Base::PlacementPy::Type, pAppModule, "Placement");
Base::Interpreter().addType(&Base::RotationPy::Type, pAppModule, "Rotation");
Base::Interpreter().addType(&Base::AxisPy::Type, pAppModule, "Axis");
Base::Interpreter().addType(&App::StringHasherPy::Type, pAppModule, "StringHasher");

// Note: Create an own module 'Base' which should provide the python
// binding classes from the base module. At a later stage we should
Expand Down Expand Up @@ -1312,6 +1314,9 @@ void Application::initTypes(void)
Data::ComplexGeoData ::init();
Data::Segment ::init();

App::StringID ::init();
App::StringHasher ::init();

// Properties
App ::Property ::init();
App ::PropertyContainer ::init();
Expand Down
8 changes: 8 additions & 0 deletions src/App/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ else()
)
endif()

generate_from_xml(StringIDPy)
generate_from_xml(StringHasherPy)
generate_from_xml(DocumentPy)
generate_from_xml(DocumentObjectPy)
generate_from_xml(ExtensionPy)
Expand All @@ -92,6 +94,8 @@ generate_from_py(FreeCADInit InitScript.h)
generate_from_py(FreeCADTest TestScript.h)

SET(FreeCADApp_XML_SRCS
StringIDPy.xml
StringHasherPy.xml
ExtensionPy.xml
ExtensionContainerPy.xml
DocumentObjectExtensionPy.xml
Expand All @@ -113,6 +117,9 @@ SOURCE_GROUP("XML" FILES ${FreeCADApp_XML_SRCS})
# The document stuff
SET(Document_CPP_SRCS
Annotation.cpp
StringHasher.cpp
StringHasherPyImp.cpp
StringIDPyImp.cpp
Document.cpp
DocumentObject.cpp
Extension.cpp
Expand Down Expand Up @@ -160,6 +167,7 @@ SET(Document_CPP_SRCS

SET(Document_HPP_SRCS
Annotation.h
StringHasher.h
Document.h
DocumentObject.h
Extension.h
Expand Down
93 changes: 41 additions & 52 deletions src/App/Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ recompute path. Also enables more complicated dependencies beyond trees.
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/subgraph.hpp>
#include <boost/graph/graphviz.hpp>
#include <boost/bimap.hpp>

#ifdef USE_OLD_DAG
#include <boost/graph/topological_sort.hpp>
Expand Down Expand Up @@ -143,6 +144,8 @@ typedef std::vector <size_t> Path;

namespace App {

typedef boost::bimap<StringHasherRef,int> HasherMap;

// Pimpl class
struct DocumentP
{
Expand All @@ -158,8 +161,7 @@ struct DocumentP
int iUndoMode;
unsigned int UndoMemSize;
unsigned int UndoMaxStackSize;
std::map<QByteArray,Document::StringID> stringHashes;
long stringHashID;
mutable HasherMap hashers;
#ifdef USE_OLD_DAG
DependencyList DepList;
std::map<DocumentObject*,Vertex> VertexObjectList;
Expand All @@ -178,7 +180,6 @@ struct DocumentP
iUndoMode = 0;
UndoMemSize = 0;
UndoMaxStackSize = 20;
stringHashID = 0;
}

static
Expand All @@ -194,19 +195,6 @@ struct DocumentP

PROPERTY_SOURCE(App::Document, App::PropertyContainer)

Document::StringID Document::mapStringToID(const char *text) {
return mapStringToID(QByteArray(text));
}

Document::StringID Document::mapStringToID(const QByteArray &data) {
QCryptographicHash hash(QCryptographicHash::Sha1);
hash.addData(data);
auto &id = d->stringHashes[hash.result()];
if(!id)
id = std::make_shared<const long>(++d->stringHashID);
return id;
}

bool Document::testStatus(Status pos) const
{
return d->StatusBits.test((size_t)pos);
Expand Down Expand Up @@ -1310,6 +1298,7 @@ void Document::setTransactionMode(int iMode)
// constructor
//--------------------------------------------------------------------------
Document::Document(void)
:Hasher(new StringHasher)
{
// Remark: In a constructor we should never increment a Python object as we cannot be sure
// if the Python interpreter gets a reference of it. E.g. if we increment but Python don't
Expand Down Expand Up @@ -1401,8 +1390,6 @@ Document::Document(void)
ADD_PROPERTY_TYPE(LicenseURL,(licenseUrl.c_str()),0,Prop_None,"URL to the license text/contract");
ADD_PROPERTY_TYPE(ShowHidden,(false), 0,PropertyType(Prop_None),
"Whether to show hidden object items in the tree view");
ADD_PROPERTY_TYPE(SaveAllStringIDs,(false), 0,PropertyType(Prop_None),
"Whether to preserve unreferenced string IDs");

// this creates and sets 'TransientDir' in onChanged()
ADD_PROPERTY_TYPE(TransientDir,(""),0,PropertyType(Prop_Transient|Prop_ReadOnly),
Expand Down Expand Up @@ -1473,48 +1460,37 @@ std::string Document::getTransientDirectoryName(const std::string& uuid, const s

void Document::Save (Base::Writer &writer) const
{
d->hashers.clear();
addStringHasher(Hasher);

writer.Stream() << "<?xml version='1.0' encoding='utf-8'?>" << endl
<< "<!--" << endl
<< " FreeCAD Document, see http://www.freecadweb.org for more information..." << endl
<< "-->" << endl;

size_t count = 0;
if(SaveAllStringIDs.getValue())
count = d->stringHashes.size();
for(auto &v : d->stringHashes)
if(v.second.use_count()>1)
++count;

writer.Stream() << "<Document SchemaVersion=\"4\" ProgramVersion=\""
<< App::Application::Config()["BuildVersionMajor"] << "."
<< App::Application::Config()["BuildVersionMinor"] << "R"
<< App::Application::Config()["BuildRevision"]
<< "\" FileVersion=\"" << writer.getFileVersion()
<< "\" StringHashCount=\"" << count << "\">" << endl;
<< "\" StringHasher=\"1\">" << endl;

writer.incInd();
count = 0;
for(auto &v : d->stringHashes) {
if(SaveAllStringIDs.getValue() || v.second.use_count()>1)
writer.Stream() << "<Hash value=\""<< v.first.toBase64().constData()
<< "\" id=\""<<*v.second<<"\"/>" << endl;
else
++count;
}
writer.decInd();
Hasher->Save(writer);

FC_LOG("string hash size " << d->stringHashes.size() << ", unused " << count);

PropertyContainer::Save(writer);

// writing the features types
writeObjects(d->objectArray, writer);
writer.Stream() << "</Document>" << endl;

d->hashers.clear();
}

void Document::Restore(Base::XMLReader &reader)
{
int i,Cnt;
d->hashers.clear();
addStringHasher(Hasher);

reader.readElement("Document");
long scheme = reader.getAttributeAsInteger("SchemaVersion");
Expand All @@ -1530,19 +1506,10 @@ void Document::Restore(Base::XMLReader &reader)
reader.FileVersion = 0;
}

d->stringHashes.clear();
d->stringHashID = 0;
if (reader.hasAttribute("StringHashCount")) {
int count = reader.getAttributeAsInteger("StringHashCount");
for(i=0;i<count;++i) {
reader.readElement("Hash");
QByteArray value(reader.getAttribute("value"));
long id = reader.getAttributeAsInteger("id");
if(d->stringHashID < id)
d->stringHashID = id;
d->stringHashes[QByteArray::fromBase64(value)] = std::make_shared<const long>(id);
}
}
if (reader.hasAttribute("StringHasher"))
Hasher->Restore(reader);
else
Hasher->clear();

// When this document was created the FileName and Label properties
// were set to the absolute path or file name, respectively. To save
Expand Down Expand Up @@ -1571,7 +1538,6 @@ void Document::Restore(Base::XMLReader &reader)
reader.readElement("Feature");
string type = reader.getAttribute("type");
string name = reader.getAttribute("name");

try {
addObject(type.c_str(), name.c_str(), /*isNew=*/ false);
}
Expand Down Expand Up @@ -1607,6 +1573,23 @@ void Document::Restore(Base::XMLReader &reader)
}

reader.readEndElement("Document");
d->hashers.clear();
}

std::pair<bool,int> Document::addStringHasher(StringHasherRef hasher) const {
auto ret = d->hashers.left.insert(HasherMap::left_map::value_type(hasher,(int)d->hashers.size()));
return std::make_pair(ret.second,ret.first->second);
}

StringHasherRef Document::getStringHasher(int idx) const {
auto it = d->hashers.right.find(idx);
StringHasherRef hasher;
if(it == d->hashers.right.end()) {
hasher = new StringHasher;
d->hashers.right.insert(HasherMap::right_map::value_type(idx,hasher));
}else
hasher = it->second;
return hasher;
}

static Document::ExportStatus _DocExporting;
Expand Down Expand Up @@ -1636,6 +1619,7 @@ void Document::exportObjects(const std::vector<App::DocumentObject*>& obj,
std::ostream& out, bool keepExternal)
{
DocumentExporting exporting(keepExternal);
d->hashers.clear();

if(FC_LOG_INSTANCE.isEnabled(FC_LOGLEVEL_LOG)) {
for(auto o : obj) {
Expand Down Expand Up @@ -1666,6 +1650,7 @@ void Document::exportObjects(const std::vector<App::DocumentObject*>& obj,

// write additional files
writer.writeFiles();
d->hashers.clear();
}

void Document::writeObjects(const std::vector<App::DocumentObject*>& obj,
Expand Down Expand Up @@ -1794,6 +1779,7 @@ Document::readObjects(Base::XMLReader& reader)
std::vector<App::DocumentObject*>
Document::importObjects(Base::XMLReader& reader)
{
d->hashers.clear();
Base::ObjectStatusLocker<Status, Document> restoreBit(Status::Restoring, this);
Base::ObjectStatusLocker<Status, Document> restoreBit2(Status::Importing, this);
reader.readElement("Document");
Expand Down Expand Up @@ -1824,6 +1810,7 @@ Document::importObjects(Base::XMLReader& reader)

afterRestore(objs);
signalFinishImportObjects(objs);
d->hashers.clear();
return objs;
}

Expand All @@ -1836,6 +1823,8 @@ unsigned int Document::getMemSize (void) const
for (it = d->objectArray.begin(); it != d->objectArray.end(); ++it)
size += (*it)->getMemSize();

size += Hasher->getMemSize();

// size of the document properties...
size += PropertyContainer::getMemSize();

Expand Down
55 changes: 33 additions & 22 deletions src/App/Document.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <Base/Persistence.h>
#include <Base/Type.h>

#include "StringHasher.h"
#include "PropertyContainer.h"
#include "PropertyStandard.h"
#include "PropertyLinks.h"
Expand Down Expand Up @@ -114,10 +115,10 @@ class AppExport Document : public App::PropertyContainer
PropertyString TipName;
/// Whether to show hidden items in TreeView
PropertyBool ShowHidden;
/// Whether to preserve unreferences string ID
PropertyBool SaveAllStringIDs;
//@}

StringHasherRef Hasher;

/** @name Signals of the document */
//@{
/// signal on new Object
Expand Down Expand Up @@ -430,6 +431,34 @@ class AppExport Document : public App::PropertyContainer
(const App::DocumentObject* from, const App::DocumentObject* to) const;
//@}

/** Called by property during properly save its continaing StringHasher
*
* @param hasher: the input hasher
* @return Returns a pair<bool,int>. Boolean member indicate if the
* StringHasher has been saved before. The Integer is the hasher index.
*
* The StringHasher object is designed to be shared among multiple objects.
* So, we must not save duplicate copies of the same hasher. And must be
* able to restore with the same sharing relationship. This function returns
* whether the hasher has been saved before by other objects, and the index
* of the hasher. If the hasher has not been saved before, the object must
* save the hasher by calling StringHasher::Save
*/
std::pair<bool,int> addStringHasher(StringHasherRef hasher) const;

/** Called by property to restore its containing StringHasher
*
* @param index: the index previously returned by calling
* addStringHasher() during save.
*
* @return Return the resulting string hasher.
*
* The caller is responsible to restore the hasher itself if it is the first
* owner of the hasher, i.e. return addStringHasher() returns true during
* save
*/
StringHasherRef getStringHasher(int index) const;

/** Return the object linked to this object
*
* @param links: holds the links found
Expand All @@ -445,28 +474,11 @@ class AppExport Document : public App::PropertyContainer
return !links.empty();
}

void addRemapProperty(Property *prop);

/// Function called to signal that an object identifier has been renamed
void renameObjectIdentifiers(const std::map<App::ObjectIdentifier, App::ObjectIdentifier> & paths, const std::function<bool(const App::DocumentObject*)> &selector = [](const App::DocumentObject *) { return true; });

/** @name Maps an arbitary string to an integer
*
* These function internally hashes the string, and stroes the hash in a
* map to integer. The hashes of the strings passed to this function are
* persisted, which means the returned ID is an unique identifier of the
* string. The function return the interger as a shared pointer to
* reference count the ID so that it is possible to prune any unused hash,
* depending on the value of Document.SaveAllStringIDs
*
* The purpose of function is to provide a short form of a stable string
* hash.
*/
//@{
typedef std::shared_ptr<const long> StringID;
StringID mapStringToID(const char *text);
StringID mapStringToID(const QByteArray &data);
static inline long stringID(StringID id) {return id?*id:-1;}
//@}

virtual PyObject *getPyObject(void);

friend class Application;
Expand Down Expand Up @@ -507,7 +519,6 @@ class AppExport Document : public App::PropertyContainer

std::string getTransientDirectoryName(const std::string& uuid, const std::string& filename) const;


private:
// # Data Member of the document +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
std::list<Transaction*> mUndoTransactions;
Expand Down
Loading

0 comments on commit e057cc1

Please sign in to comment.