From e057cc1728a28b60b96bd5aafca00536924eda9d Mon Sep 17 00:00:00 2001 From: "Zheng, Lei" Date: Wed, 28 Feb 2018 20:33:05 +0800 Subject: [PATCH] Introduce App.StringHasher and StringID Split Document::mapStringIDs function into StringHasher StringHasher function as a persistent string hash/indexer. It stores the string into a internal map, and index the string with an incremental integer ID. The ID is returned as a reference counted object StringID. By default StringHasher only persists used strings. Strings longer than a configurable length threshold will be internally hashed with SHA1 and original text discarded. Both StringHasher and StringID are exposed to Python. Document object has a default hasher object. Other property can easily embed their own string hasher. --- src/App/Application.cpp | 5 + src/App/CMakeLists.txt | 8 ++ src/App/Document.cpp | 93 ++++++-------- src/App/Document.h | 55 ++++---- src/App/DocumentPy.xml | 17 +-- src/App/DocumentPyImp.cpp | 30 +---- src/App/StringHasher.cpp | 232 ++++++++++++++++++++++++++++++++++ src/App/StringHasher.h | 124 ++++++++++++++++++ src/App/StringHasherPy.xml | 70 ++++++++++ src/App/StringHasherPyImp.cpp | 152 ++++++++++++++++++++++ src/App/StringIDPy.xml | 48 +++++++ src/App/StringIDPyImp.cpp | 77 +++++++++++ 12 files changed, 801 insertions(+), 110 deletions(-) create mode 100644 src/App/StringHasher.cpp create mode 100644 src/App/StringHasher.h create mode 100644 src/App/StringHasherPy.xml create mode 100644 src/App/StringHasherPyImp.cpp create mode 100644 src/App/StringIDPy.xml create mode 100644 src/App/StringIDPyImp.cpp diff --git a/src/App/Application.cpp b/src/App/Application.cpp index ae7fd9042e47..4b4b9ccc04d8 100644 --- a/src/App/Application.cpp +++ b/src/App/Application.cpp @@ -81,6 +81,7 @@ #include #include +#include "StringHasherPy.h" #include "GeoFeature.h" #include "FeatureTest.h" #include "FeaturePython.h" @@ -231,6 +232,7 @@ Application::Application(std::map &mConfig) Base::Interpreter().addType(&Base::PlacementPy::Type, pAppModule, "Placement"); Base::Interpreter().addType(&Base::RotationPy::Type, pAppModule, "Rotation"); Base::Interpreter().addType(&Base::AxisPy::Type, pAppModule, "Axis"); + Base::Interpreter().addType(&App::StringHasherPy::Type, pAppModule, "StringHasher"); // Note: Create an own module 'Base' which should provide the python // binding classes from the base module. At a later stage we should @@ -1312,6 +1314,9 @@ void Application::initTypes(void) Data::ComplexGeoData ::init(); Data::Segment ::init(); + App::StringID ::init(); + App::StringHasher ::init(); + // Properties App ::Property ::init(); App ::PropertyContainer ::init(); diff --git a/src/App/CMakeLists.txt b/src/App/CMakeLists.txt index c70aa0f0940f..20ac42cc997c 100644 --- a/src/App/CMakeLists.txt +++ b/src/App/CMakeLists.txt @@ -71,6 +71,8 @@ else() ) endif() +generate_from_xml(StringIDPy) +generate_from_xml(StringHasherPy) generate_from_xml(DocumentPy) generate_from_xml(DocumentObjectPy) generate_from_xml(ExtensionPy) @@ -92,6 +94,8 @@ generate_from_py(FreeCADInit InitScript.h) generate_from_py(FreeCADTest TestScript.h) SET(FreeCADApp_XML_SRCS + StringIDPy.xml + StringHasherPy.xml ExtensionPy.xml ExtensionContainerPy.xml DocumentObjectExtensionPy.xml @@ -113,6 +117,9 @@ SOURCE_GROUP("XML" FILES ${FreeCADApp_XML_SRCS}) # The document stuff SET(Document_CPP_SRCS Annotation.cpp + StringHasher.cpp + StringHasherPyImp.cpp + StringIDPyImp.cpp Document.cpp DocumentObject.cpp Extension.cpp @@ -160,6 +167,7 @@ SET(Document_CPP_SRCS SET(Document_HPP_SRCS Annotation.h + StringHasher.h Document.h DocumentObject.h Extension.h diff --git a/src/App/Document.cpp b/src/App/Document.cpp index e1b1de75b8cd..7b8d560e99e1 100644 --- a/src/App/Document.cpp +++ b/src/App/Document.cpp @@ -62,6 +62,7 @@ recompute path. Also enables more complicated dependencies beyond trees. #include #include #include +#include #ifdef USE_OLD_DAG #include @@ -143,6 +144,8 @@ typedef std::vector Path; namespace App { +typedef boost::bimap HasherMap; + // Pimpl class struct DocumentP { @@ -158,8 +161,7 @@ struct DocumentP int iUndoMode; unsigned int UndoMemSize; unsigned int UndoMaxStackSize; - std::map stringHashes; - long stringHashID; + mutable HasherMap hashers; #ifdef USE_OLD_DAG DependencyList DepList; std::map VertexObjectList; @@ -178,7 +180,6 @@ struct DocumentP iUndoMode = 0; UndoMemSize = 0; UndoMaxStackSize = 20; - stringHashID = 0; } static @@ -194,19 +195,6 @@ struct DocumentP PROPERTY_SOURCE(App::Document, App::PropertyContainer) -Document::StringID Document::mapStringToID(const char *text) { - return mapStringToID(QByteArray(text)); -} - -Document::StringID Document::mapStringToID(const QByteArray &data) { - QCryptographicHash hash(QCryptographicHash::Sha1); - hash.addData(data); - auto &id = d->stringHashes[hash.result()]; - if(!id) - id = std::make_shared(++d->stringHashID); - return id; -} - bool Document::testStatus(Status pos) const { return d->StatusBits.test((size_t)pos); @@ -1310,6 +1298,7 @@ void Document::setTransactionMode(int iMode) // constructor //-------------------------------------------------------------------------- Document::Document(void) + :Hasher(new StringHasher) { // Remark: In a constructor we should never increment a Python object as we cannot be sure // if the Python interpreter gets a reference of it. E.g. if we increment but Python don't @@ -1401,8 +1390,6 @@ Document::Document(void) ADD_PROPERTY_TYPE(LicenseURL,(licenseUrl.c_str()),0,Prop_None,"URL to the license text/contract"); ADD_PROPERTY_TYPE(ShowHidden,(false), 0,PropertyType(Prop_None), "Whether to show hidden object items in the tree view"); - ADD_PROPERTY_TYPE(SaveAllStringIDs,(false), 0,PropertyType(Prop_None), - "Whether to preserve unreferenced string IDs"); // this creates and sets 'TransientDir' in onChanged() ADD_PROPERTY_TYPE(TransientDir,(""),0,PropertyType(Prop_Transient|Prop_ReadOnly), @@ -1473,48 +1460,37 @@ std::string Document::getTransientDirectoryName(const std::string& uuid, const s void Document::Save (Base::Writer &writer) const { + d->hashers.clear(); + addStringHasher(Hasher); + writer.Stream() << "" << endl << "" << endl; - size_t count = 0; - if(SaveAllStringIDs.getValue()) - count = d->stringHashes.size(); - for(auto &v : d->stringHashes) - if(v.second.use_count()>1) - ++count; - writer.Stream() << "" << endl; + << "\" StringHasher=\"1\">" << endl; - writer.incInd(); - count = 0; - for(auto &v : d->stringHashes) { - if(SaveAllStringIDs.getValue() || v.second.use_count()>1) - writer.Stream() << "" << endl; - else - ++count; - } - writer.decInd(); + Hasher->Save(writer); - FC_LOG("string hash size " << d->stringHashes.size() << ", unused " << count); - PropertyContainer::Save(writer); // writing the features types writeObjects(d->objectArray, writer); writer.Stream() << "" << endl; + + d->hashers.clear(); } void Document::Restore(Base::XMLReader &reader) { int i,Cnt; + d->hashers.clear(); + addStringHasher(Hasher); reader.readElement("Document"); long scheme = reader.getAttributeAsInteger("SchemaVersion"); @@ -1530,19 +1506,10 @@ void Document::Restore(Base::XMLReader &reader) reader.FileVersion = 0; } - d->stringHashes.clear(); - d->stringHashID = 0; - if (reader.hasAttribute("StringHashCount")) { - int count = reader.getAttributeAsInteger("StringHashCount"); - for(i=0;istringHashID < id) - d->stringHashID = id; - d->stringHashes[QByteArray::fromBase64(value)] = std::make_shared(id); - } - } + if (reader.hasAttribute("StringHasher")) + Hasher->Restore(reader); + else + Hasher->clear(); // When this document was created the FileName and Label properties // were set to the absolute path or file name, respectively. To save @@ -1571,7 +1538,6 @@ void Document::Restore(Base::XMLReader &reader) reader.readElement("Feature"); string type = reader.getAttribute("type"); string name = reader.getAttribute("name"); - try { addObject(type.c_str(), name.c_str(), /*isNew=*/ false); } @@ -1607,6 +1573,23 @@ void Document::Restore(Base::XMLReader &reader) } reader.readEndElement("Document"); + d->hashers.clear(); +} + +std::pair Document::addStringHasher(StringHasherRef hasher) const { + auto ret = d->hashers.left.insert(HasherMap::left_map::value_type(hasher,(int)d->hashers.size())); + return std::make_pair(ret.second,ret.first->second); +} + +StringHasherRef Document::getStringHasher(int idx) const { + auto it = d->hashers.right.find(idx); + StringHasherRef hasher; + if(it == d->hashers.right.end()) { + hasher = new StringHasher; + d->hashers.right.insert(HasherMap::right_map::value_type(idx,hasher)); + }else + hasher = it->second; + return hasher; } static Document::ExportStatus _DocExporting; @@ -1636,6 +1619,7 @@ void Document::exportObjects(const std::vector& obj, std::ostream& out, bool keepExternal) { DocumentExporting exporting(keepExternal); + d->hashers.clear(); if(FC_LOG_INSTANCE.isEnabled(FC_LOGLEVEL_LOG)) { for(auto o : obj) { @@ -1666,6 +1650,7 @@ void Document::exportObjects(const std::vector& obj, // write additional files writer.writeFiles(); + d->hashers.clear(); } void Document::writeObjects(const std::vector& obj, @@ -1794,6 +1779,7 @@ Document::readObjects(Base::XMLReader& reader) std::vector Document::importObjects(Base::XMLReader& reader) { + d->hashers.clear(); Base::ObjectStatusLocker restoreBit(Status::Restoring, this); Base::ObjectStatusLocker restoreBit2(Status::Importing, this); reader.readElement("Document"); @@ -1824,6 +1810,7 @@ Document::importObjects(Base::XMLReader& reader) afterRestore(objs); signalFinishImportObjects(objs); + d->hashers.clear(); return objs; } @@ -1836,6 +1823,8 @@ unsigned int Document::getMemSize (void) const for (it = d->objectArray.begin(); it != d->objectArray.end(); ++it) size += (*it)->getMemSize(); + size += Hasher->getMemSize(); + // size of the document properties... size += PropertyContainer::getMemSize(); diff --git a/src/App/Document.h b/src/App/Document.h index c79792d88fd8..ed1061ff6725 100644 --- a/src/App/Document.h +++ b/src/App/Document.h @@ -28,6 +28,7 @@ #include #include +#include "StringHasher.h" #include "PropertyContainer.h" #include "PropertyStandard.h" #include "PropertyLinks.h" @@ -114,10 +115,10 @@ class AppExport Document : public App::PropertyContainer PropertyString TipName; /// Whether to show hidden items in TreeView PropertyBool ShowHidden; - /// Whether to preserve unreferences string ID - PropertyBool SaveAllStringIDs; //@} + StringHasherRef Hasher; + /** @name Signals of the document */ //@{ /// signal on new Object @@ -430,6 +431,34 @@ class AppExport Document : public App::PropertyContainer (const App::DocumentObject* from, const App::DocumentObject* to) const; //@} + /** Called by property during properly save its continaing StringHasher + * + * @param hasher: the input hasher + * @return Returns a pair. Boolean member indicate if the + * StringHasher has been saved before. The Integer is the hasher index. + * + * The StringHasher object is designed to be shared among multiple objects. + * So, we must not save duplicate copies of the same hasher. And must be + * able to restore with the same sharing relationship. This function returns + * whether the hasher has been saved before by other objects, and the index + * of the hasher. If the hasher has not been saved before, the object must + * save the hasher by calling StringHasher::Save + */ + std::pair addStringHasher(StringHasherRef hasher) const; + + /** Called by property to restore its containing StringHasher + * + * @param index: the index previously returned by calling + * addStringHasher() during save. + * + * @return Return the resulting string hasher. + * + * The caller is responsible to restore the hasher itself if it is the first + * owner of the hasher, i.e. return addStringHasher() returns true during + * save + */ + StringHasherRef getStringHasher(int index) const; + /** Return the object linked to this object * * @param links: holds the links found @@ -445,28 +474,11 @@ class AppExport Document : public App::PropertyContainer return !links.empty(); } + void addRemapProperty(Property *prop); + /// Function called to signal that an object identifier has been renamed void renameObjectIdentifiers(const std::map & paths, const std::function &selector = [](const App::DocumentObject *) { return true; }); - /** @name Maps an arbitary string to an integer - * - * These function internally hashes the string, and stroes the hash in a - * map to integer. The hashes of the strings passed to this function are - * persisted, which means the returned ID is an unique identifier of the - * string. The function return the interger as a shared pointer to - * reference count the ID so that it is possible to prune any unused hash, - * depending on the value of Document.SaveAllStringIDs - * - * The purpose of function is to provide a short form of a stable string - * hash. - */ - //@{ - typedef std::shared_ptr StringID; - StringID mapStringToID(const char *text); - StringID mapStringToID(const QByteArray &data); - static inline long stringID(StringID id) {return id?*id:-1;} - //@} - virtual PyObject *getPyObject(void); friend class Application; @@ -507,7 +519,6 @@ class AppExport Document : public App::PropertyContainer std::string getTransientDirectoryName(const std::string& uuid, const std::string& filename) const; - private: // # Data Member of the document +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ std::list mUndoTransactions; diff --git a/src/App/DocumentPy.xml b/src/App/DocumentPy.xml index ea1a756c9815..9cd6f143b99e 100644 --- a/src/App/DocumentPy.xml +++ b/src/App/DocumentPy.xml @@ -203,17 +203,6 @@ sort: whether to topologically sort the return list - - - -mapStringToID(txt) -> Int. Map a string to a unique integer ID. - -Any mapping obtained through this function will be persisted to document file. The returned ID -string mapping is persisted to document file. However, current implementation does not reference -count this ID in python. So it may be lost if Document.SaveAllStringID is False. - - - The dependency graph as GraphViz text @@ -322,6 +311,12 @@ count this ID in python. So it may be lost if Document.SaveAllStringID is False. + + + Get the string hasher of this document + + + diff --git a/src/App/DocumentPyImp.cpp b/src/App/DocumentPyImp.cpp index 30a2beaeb882..5d939bb74cd1 100644 --- a/src/App/DocumentPyImp.cpp +++ b/src/App/DocumentPyImp.cpp @@ -849,31 +849,6 @@ PyObject* DocumentPy::getLinksTo(PyObject *args) return Py::new_reference_to(ret); } -PyObject* DocumentPy::mapStringToID(PyObject *args) -{ - PyObject *value; - if (!PyArg_ParseTuple(args, "O",&value)) - return NULL; // NULL triggers exception - std::string txt; -#if PY_MAJOR_VERSION >= 3 - if (PyUnicode_Check(value)) { - txt = PyUnicode_AsUTF8(value); - } -#else - if (PyUnicode_Check(value)) { - PyObject* unicode = PyUnicode_AsLatin1String(value); - txt = PyString_AsString(unicode); - Py_DECREF(unicode); - } - else if (PyString_Check(value)) { - txt = PyString_AsString(value); - } -#endif - else - throw Py::TypeError("expect argument of type string"); - return Py::new_reference_to(Py::Int(Document::stringID(getDocumentPtr()->mapStringToID(txt.c_str())))); -} - Py::List DocumentPy::getInList(void) const { Py::List ret; @@ -919,4 +894,9 @@ Py::Boolean DocumentPy::getImporting(void) const return Py::Boolean(getDocumentPtr()->testStatus(Document::Status::Importing)); } +Py::Object DocumentPy::getHasher() const { + return Py::Object(getDocumentPtr()->Hasher->getPyObject(),true); +} + + diff --git a/src/App/StringHasher.cpp b/src/App/StringHasher.cpp new file mode 100644 index 000000000000..90c3e4b5d779 --- /dev/null +++ b/src/App/StringHasher.cpp @@ -0,0 +1,232 @@ +/**************************************************************************** + * Copyright (c) 2018 Zheng, Lei (realthunder) * + * * + * This file is part of the FreeCAD CAx development system. * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of the GNU Library General Public * + * License as published by the Free Software Foundation; either * + * version 2 of the License, or (at your option) any later version. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Library General Public License for more details. * + * * + * You should have received a copy of the GNU Library General Public * + * License along with this library; see the file COPYING.LIB. If not, * + * write to the Free Software Foundation, Inc., 59 Temple Place, * + * Suite 330, Boston, MA 02111-1307, USA * + * * + ****************************************************************************/ + + +#include "PreCompiled.h" + +#ifndef _PreComp_ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace App; + +/////////////////////////////////////////////////////////// + +TYPESYSTEM_SOURCE_ABSTRACT(App::StringID, Base::BaseClass) + +PyObject *StringID::getPyObject() { + return new StringIDPy(this); +} + +std::string StringID::toString() const { + std::ostringstream ss; + ss << 'H' << value(); + return ss.str(); +} + +/////////////////////////////////////////////////////////// +// +namespace boost { +template<> +struct hash { + size_t operator()(const QByteArray &data) const { + return qHash(data); + } +}; +} + +typedef boost::bimap< + boost::bimaps::unordered_set_of, + boost::bimaps::set_of, + boost::bimaps::with_info > HashMapBase; + +class StringHasher::HashMap: public HashMapBase +{ +public: + bool SaveAll = false; + int Threshold = 40; +}; + +/////////////////////////////////////////////////////////// + +TYPESYSTEM_SOURCE(App::StringHasher, Base::Persistence) + +StringHasher::StringHasher() + :_hashes(new HashMap) +{} + +StringHasher::~StringHasher() { +} + +void StringHasher::setSaveAll(bool enable) { + _hashes->SaveAll = enable; +} + +bool StringHasher::getSaveAll() const { + return _hashes->SaveAll; +} + +void StringHasher::setThreshold(int threshold) { + _hashes->Threshold = threshold; +} + +int StringHasher::getThreshold() const { + return _hashes->Threshold; +} + +long StringHasher::lastID() const { + if(_hashes->right.empty()) + return 0; + auto it = _hashes->right.end(); + --it; + return it->first; +} + +StringIDRef StringHasher::getID(const char *text, int len) { + if(len<0) len = strlen(text); + return getID(QByteArray::fromRawData(text,len),false); +} + +StringIDRef StringHasher::getID(QByteArray data, bool binary) { + QByteArray hash; + bool hashed = _hashes->Threshold>=0 && (int)data.size()>_hashes->Threshold; + if(hashed) { + QCryptographicHash hasher(QCryptographicHash::Sha1); + hasher.addData(data); + hash = hasher.result(); + }else + hash = data; + + auto it = _hashes->left.find(hash); + if(it!=_hashes->left.end()) + return it->info; + + StringIDRef sid; + if(hashed) { + // if hashed, discard the original data + data = hash; + }else{ + // if not hashed, make a deep copy of the data + data = QByteArray(data.constData(),data.size()); + hash = data; + } + sid = new StringID(lastID()+1,data,binary,hashed); + _hashes->right.insert(_hashes->right.end(),HashMap::right_map::value_type(sid->value(),hash,sid)); + return sid; +} + +StringIDRef StringHasher::getID(long id) const { + auto it = _hashes->right.find(id); + if(it == _hashes->right.end()) + return StringIDRef(); + return it->info; +} + +void StringHasher::Save(Base::Writer &writer) const { + size_t count = _hashes->SaveAll?this->size():this->count(); + writer.incInd(); + writer.Stream() << writer.ind() << "SaveAll + << "\" threshold=\"" << _hashes->Threshold << "\">" << std::endl; + for(auto &v : _hashes->right) { + if(_hashes->SaveAll || v.info.getRefCount()>1) { + // We are omiting the indentation to save some space in case of long list of hashes + if(v.info->isHashed()) + writer.Stream() <<"isBinary()) + writer.Stream() <<"data().constData()); + writer.Stream() << "\" id=\""<" << std::endl; + } + } + writer.Stream() << writer.ind() << "" << std::endl; + writer.decInd(); +} + +void StringHasher::clear() { + _hashes->clear(); +} + +size_t StringHasher::size() const { + return _hashes->size(); +} + +size_t StringHasher::count() const { + size_t count = 0; + for(auto &v : _hashes->right) + if(v.info.getRefCount()>1) + ++count; + return count; +} + +void StringHasher::Restore(Base::XMLReader &reader) { + clear(); + reader.readElement("StringHasher"); + int count = reader.getAttributeAsInteger("count"); + _hashes->SaveAll = reader.getAttributeAsInteger("saveall")?true:false; + _hashes->Threshold = reader.getAttributeAsInteger("threshold"); + for(int i=0;iright.insert(_hashes->right.end(),HashMap::right_map::value_type(sid->value(),data,sid)); + } + reader.readEndElement("StringHasher"); +} + +unsigned int StringHasher::getMemSize (void) const { + return (_hashes->SaveAll?size():count()); +} + +PyObject *StringHasher::getPyObject() { + return new StringHasherPy(this); +} + +std::map StringHasher::getIDMap() const { + std::map ret; + for(auto &v : _hashes->right) + ret.emplace_hint(ret.end(),v.first,v.info); + return ret; +} diff --git a/src/App/StringHasher.h b/src/App/StringHasher.h new file mode 100644 index 000000000000..d4fcbd2f3116 --- /dev/null +++ b/src/App/StringHasher.h @@ -0,0 +1,124 @@ +/**************************************************************************** + * Copyright (c) 2018 Zheng, Lei (realthunder) * + * * + * This file is part of the FreeCAD CAx development system. * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of the GNU Library General Public * + * License as published by the Free Software Foundation; either * + * version 2 of the License, or (at your option) any later version. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Library General Public License for more details. * + * * + * You should have received a copy of the GNU Library General Public * + * License along with this library; see the file COPYING.LIB. If not, * + * write to the Free Software Foundation, Inc., 59 Temple Place, * + * Suite 330, Boston, MA 02111-1307, USA * + * * + ****************************************************************************/ + +#ifndef APP_STRINGID_H +#define APP_STRINGID_H + +#include +#include +#include +#include +#include + +namespace App { + +class AppExport StringID: public Base::BaseClass, public Base::Handled { + TYPESYSTEM_HEADER(); +public: + StringID(long id, const QByteArray &data, bool binary, bool hashed) + :_id(id),_data(data),_binary(binary),_hashed(hashed) + {} + virtual ~StringID(){} + long value() const {return _id;} + const QByteArray &data() const {return _data;} + bool isBinary() const {return _binary;} + bool isHashed() const {return _hashed;} + virtual PyObject *getPyObject() override; + std::string toString() const; +private: + long _id; + QByteArray _data; + bool _binary; + bool _hashed; +}; + +typedef Base::Reference StringIDRef; + +class AppExport StringHasher: public Base::Persistence, public Base::Handled { + + TYPESYSTEM_HEADER(); + +public: + StringHasher(); + virtual ~StringHasher(); + + virtual unsigned int getMemSize (void) const override; + virtual void Save (Base::Writer &/*writer*/) const override; + virtual void Restore(Base::XMLReader &/*reader*/) override; + + /** Maps an arbitary string to an integer + * + * These function internally hashes the string, and stroes the hash in a + * map to integer. The hashes of the strings passed to this function are + * persisted, which means the returned ID is an unique identifier of the + * string. The function return the interger as a shared pointer to + * reference count the ID so that it is possible to prune any unused hash + * + * The purpose of function is to provide a short form of a stable string + * hash. + */ + StringIDRef getID(const char *text, int len=-1); + + /** Map text or binary data to an integer */ + StringIDRef getID(QByteArray data, bool binary); + + /** Obtain the reference counted StringID object from numerical id + * + * This function exists because the string hash is a one way function, and + * the original text is not persistent. The caller use this function to + * retieve the reference count ID object after restore + */ + StringIDRef getID(long id) const; + + std::map getIDMap() const; + + /// Clear all string hashes + void clear(); + + /// Size of the hash table + size_t size() const; + + /// Return the number of hashes that are used by others + size_t count() const; + + virtual PyObject *getPyObject(void); + + void setSaveAll(bool enable); + bool getSaveAll() const; + + void setThreshold(int threshold); + int getThreshold() const; + + class HashMap; + +private: + long lastID() const; + +private: + std::unique_ptr _hashes; +}; + +typedef Base::Reference StringHasherRef; + +} + +#endif diff --git a/src/App/StringHasherPy.xml b/src/App/StringHasherPy.xml new file mode 100644 index 000000000000..60a2f0f46120 --- /dev/null +++ b/src/App/StringHasherPy.xml @@ -0,0 +1,70 @@ + + + + + + This is the StringHasher class + This is the StringHasher class + + + + +getID(txt|id, base64=False) -> StringID + +If the input is text, return a StringID object that is unique within this hasher. This +StringID object is reference counted. The hasher may only save hash ID's that are used. + +If the input is an integer, then the hasher will try to find the StringID object stored +with the same integer value. + +base64: indicate if the input 'txt' is base64 encoded binary data + + + + + + Check if two hasher are the same + + + + + Return count of used hashes + + + + + + Return the size of the hashes + + + + + + Whether to save all string hashes regardless of its use count + + + + + + Data length exceed this threshold will be hashed before storing + + + + + + Return the entire string table as Int->String dictionary + + + + + diff --git a/src/App/StringHasherPyImp.cpp b/src/App/StringHasherPyImp.cpp new file mode 100644 index 000000000000..5986b8658b68 --- /dev/null +++ b/src/App/StringHasherPyImp.cpp @@ -0,0 +1,152 @@ +/**************************************************************************** + * Copyright (c) 2018 Zheng, Lei (realthunder) * + * * + * This file is part of the FreeCAD CAx development system. * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of the GNU Library General Public * + * License as published by the Free Software Foundation; either * + * version 2 of the License, or (at your option) any later version. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Library General Public License for more details. * + * * + * You should have received a copy of the GNU Library General Public * + * License along with this library; see the file COPYING.LIB. If not, * + * write to the Free Software Foundation, Inc., 59 Temple Place, * + * Suite 330, Boston, MA 02111-1307, USA * + * * + ****************************************************************************/ + +#include "PreCompiled.h" + +#include "StringHasher.h" + +#include "StringHasherPy.h" +#include "StringHasherPy.cpp" + +using namespace App; + +// returns a string which represent the object e.g. when printed in python +std::string StringHasherPy::representation(void) const +{ + return std::string(""); +} + +PyObject *StringHasherPy::PyMake(struct _typeobject *, PyObject *, PyObject *) // Python wrapper +{ + return new StringHasherPy(new StringHasher); +} + +// constructor method +int StringHasherPy::PyInit(PyObject* , PyObject* ) +{ + return 0; +} + + +PyObject* StringHasherPy::isSame(PyObject *args) +{ + PyObject *other; + if (!PyArg_ParseTuple(args, "O!", &StringHasherPy::Type, &other)){ // convert args: Python->C + return Py::new_reference_to(Py::False()); + } + auto otherHasher = static_cast(other)->getStringHasherPtr(); + return Py::new_reference_to(Py::Boolean(getStringHasherPtr() == otherHasher)); +} + +PyObject* StringHasherPy::getID(PyObject *args) +{ + long id = -1; + PyObject *value = 0; + PyObject *base64 = Py_False; + if (!PyArg_ParseTuple(args, "l|O",&id,&base64)) { + if (!PyArg_ParseTuple(args, "O|O",&value,&base64)) + return NULL; // NULL triggers exception + } + if(id>0) { + PY_TRY { + auto sid = getStringHasherPtr()->getID(id); + if(!sid) Py_Return; + return sid->getPyObject(); + }PY_CATCH; + } + std::string txt; +#if PY_MAJOR_VERSION >= 3 + if (PyUnicode_Check(value)) { + txt = PyUnicode_AsUTF8(value); + } +#else + if (PyUnicode_Check(value)) { + PyObject* unicode = PyUnicode_AsLatin1String(value); + txt = PyString_AsString(unicode); + Py_DECREF(unicode); + } + else if (PyString_Check(value)) { + txt = PyString_AsString(value); + } +#endif + else + throw Py::TypeError("expect argument of type string"); + PY_TRY { + QByteArray data; + StringIDRef sid; + if(PyObject_IsTrue(base64)) { + data = QByteArray::fromBase64(QByteArray::fromRawData(txt.c_str(),txt.size())); + sid = getStringHasherPtr()->getID(data,true); + }else + sid = getStringHasherPtr()->getID(txt.c_str(),txt.size()); + return sid->getPyObject(); + }PY_CATCH; +} + +Py::Int StringHasherPy::getCount(void) const { + return Py::Int((long)getStringHasherPtr()->count()); +} + +Py::Int StringHasherPy::getSize(void) const { + return Py::Int((long)getStringHasherPtr()->size()); +} + +Py::Boolean StringHasherPy::getSaveAll(void) const { + return Py::Boolean(getStringHasherPtr()->getSaveAll()); +} + +void StringHasherPy::setSaveAll(Py::Boolean value) { + getStringHasherPtr()->setSaveAll(value); +} + +Py::Int StringHasherPy::getThreshold(void) const { + return Py::Int((long)getStringHasherPtr()->getThreshold()); +} + +void StringHasherPy::setThreshold(Py::Int value) { + getStringHasherPtr()->setThreshold(value); +} + +Py::Dict StringHasherPy::getTable() const { + Py::Dict dict; + for(auto &v : getStringHasherPtr()->getIDMap()) { + if(v.second->isHashed()) + dict.setItem(Py::Int(v.first),Py::String(v.second->data().toHex().constData())); + else if(v.second->isBinary()) + dict.setItem(Py::Int(v.first),Py::String(v.second->data().toBase64().constData())); + else + dict.setItem(Py::Int(v.first),Py::String(v.second->data().constData())); + } + return dict; +} + +PyObject *StringHasherPy::getCustomAttributes(const char* /*attr*/) const +{ + return 0; +} + +int StringHasherPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/) +{ + return 0; +} + + diff --git a/src/App/StringIDPy.xml b/src/App/StringIDPy.xml new file mode 100644 index 000000000000..b14230c88600 --- /dev/null +++ b/src/App/StringIDPy.xml @@ -0,0 +1,48 @@ + + + + + + This is the StringID class + This is the StringID class + + + + Check if two StringIDs are the same + + + + + Return the integer value of this ID + + + + + + Return the data associated with this ID + + + + + + Check if the data is binary, + + + + + + Check if the data is hash, if so 'Data' returns a base64 encoded string of the raw hash + + + + + diff --git a/src/App/StringIDPyImp.cpp b/src/App/StringIDPyImp.cpp new file mode 100644 index 000000000000..e8eb9df9c42d --- /dev/null +++ b/src/App/StringIDPyImp.cpp @@ -0,0 +1,77 @@ +/**************************************************************************** + * Copyright (c) 2018 Zheng, Lei (realthunder) * + * * + * This file is part of the FreeCAD CAx development system. * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of the GNU Library General Public * + * License as published by the Free Software Foundation; either * + * version 2 of the License, or (at your option) any later version. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Library General Public License for more details. * + * * + * You should have received a copy of the GNU Library General Public * + * License along with this library; see the file COPYING.LIB. If not, * + * write to the Free Software Foundation, Inc., 59 Temple Place, * + * Suite 330, Boston, MA 02111-1307, USA * + * * + ****************************************************************************/ + +#include "PreCompiled.h" + +#include "StringHasher.h" + +#include "StringIDPy.h" +#include "StringIDPy.cpp" + +using namespace App; + +// returns a string which represent the object e.g. when printed in python +std::string StringIDPy::representation(void) const +{ + return getStringIDPtr()->toString(); +} + +PyObject* StringIDPy::isSame(PyObject *args) +{ + PyObject *other; + if (!PyArg_ParseTuple(args, "O!", &StringIDPy::Type, &other)) { // convert args: Python->C + return Py::new_reference_to(Py::False()); + } + auto otherID = static_cast(other)->getStringIDPtr(); + return Py::new_reference_to(Py::Boolean(getStringIDPtr() == otherID)); +} + +Py::Int StringIDPy::getValue(void) const { + return Py::Int(getStringIDPtr()->value()); +} + +Py::String StringIDPy::getData(void) const { + auto sid = getStringIDPtr(); + if(sid->isBinary()) + return Py::String(sid->data().toBase64().constData()); + return Py::String(sid->data().constData()); +} + +Py::Boolean StringIDPy::getIsBinary(void) const { + return Py::Boolean(getStringIDPtr()->isBinary()); +} + +Py::Boolean StringIDPy::getIsHashed(void) const { + return Py::Boolean(getStringIDPtr()->isHashed()); +} + +PyObject *StringIDPy::getCustomAttributes(const char* /*attr*/) const +{ + return 0; +} + +int StringIDPy::setCustomAttributes(const char* /*attr*/, PyObject* /*obj*/) +{ + return 0; +} + +