Skip to content

Commit

Permalink
class Simhasher: using cppjieba::Jieba instead of cppjieba::keywordex…
Browse files Browse the repository at this point in the history
…tractor
  • Loading branch information
yanyiwu committed Sep 22, 2024
1 parent 7409421 commit 87b9037
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions include/simhash/Simhasher.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef SIMHASH_SIMHASHER_HPP
#define SIMHASH_SIMHASHER_HPP

#include "cppjieba/KeywordExtractor.hpp"
#include "cppjieba/Jieba.hpp"
#include "jenkins.h"

namespace simhash
Expand All @@ -12,15 +12,15 @@ namespace simhash
private:
enum{BITS_LENGTH = 64};
jenkins _hasher;
KeywordExtractor _extractor;
cppjieba::Jieba _jieba;
public:
Simhasher(const string& dictPath, const string& modelPath, const string& idfPath, const string& stopWords): _extractor(dictPath, modelPath, idfPath, stopWords)
Simhasher(const string& dictPath, const string& modelPath, const string& idfPath, const string& stopWords): _jieba(dictPath, modelPath, "", idfPath, stopWords)
{}
~Simhasher(){};

bool extract(const string& text, vector<pair<string,double> > & res, size_t topN) const
{
_extractor.Extract(text, res, topN);
_jieba.extractor.Extract(text, res, topN);
return true;
}
bool make(const string& text, size_t topN, vector<pair<uint64_t, double> >& res) const
Expand Down

0 comments on commit 87b9037

Please sign in to comment.