Skip to content

Commit

Permalink
removed bluebird, request packages, added pr from koorchik/node-myste…
Browse files Browse the repository at this point in the history
  • Loading branch information
ilnuribat committed May 8, 2024
1 parent 06e93ad commit 3760249
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 440 deletions.
1 change: 1 addition & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ module.exports = {
ecmaVersion: 'latest',
},
rules: {
'no-console': 'off',
},
};
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

MyStem
MyStem-promise
------

This module contains a wrapper for an excellent morphological analyzer for Russian language Yandex Mystem 3.1 (3.0 for 32bit architectures). A morphological analyzer can perform lemmatization of text and derive a set of morphological attributes for each token.
Expand All @@ -12,16 +12,19 @@ This allows to avoid process start overhead.

```javascript

var MyStem = require('mystem3');
const MyStem = require('mystem3');

var myStem = new MyStem();
const myStem = new MyStem();
myStem.start(); // Run mystem in separate process

myStem.lemmatize("немцы").then(function(lemma) {
myStem.lemmatize("немцы")
.then(function(lemma) {
console.log(lemma);
}).then(function() {
})
.then(function() {
myStem.stop(); // Or you can write process.exit();
}).catch(console.error);
})
.catch(console.error);

```

Expand Down
63 changes: 34 additions & 29 deletions bin/download-mystem.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,16 @@
const fs = require('fs');
const path = require('path');
const { mkdirp } = require('mkdirp');
const request = require('request');
const tar = require('tar');
const extractZip = require('extract-zip');
const { rimraf } = require('rimraf');
const https = require('https');
const http = require('http');


const TARBALL_URLS = {
linux: {
ia32: 'https://download.cdn.yandex.net/mystem/mystem-3.0-linux3.5-32bit.tar.gz',
x64: 'http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz',
x64: 'http://download.cdn.yandex.net/mystem/mystem-3.1-linux-64bit.tar.gz',
},
darwin: {
x64: 'http://download.cdn.yandex.net/mystem/mystem-3.1-macosx.tar.gz',
Expand All @@ -35,29 +33,36 @@ async function downloadFile(url, dest) {
const ws = fs.createWriteStream(dest);

await new Promise((resolve, reject) => {
protocol.get(url, (res) => {
if ([301, 302].includes(res.statusCode)) {
return downloadFile(res.headers.location, dest)
.then(resolve)
.catch(reject);
}
if (res.statusCode !== 200) {
return reject(`http(s) error: code ${res.statusCode}`);
}

res.pipe(ws);
res.on('error', (...e) => {
console.log('request error', e);
fs.unlink(dest); // Delete the file async. (But we don't check the result)
reject();
});

ws.on('finish', () => {
ws.close();
console.log('downloaded');
resolve();
protocol
.get(url, (res) => {
if ([301, 302].includes(res.statusCode)) {
return downloadFile(res.headers.location, dest)
.then(resolve)
.catch(reject);
}
if (res.statusCode !== 200) {
console.log(res);
throw new Error(`http(s) error: code ${res.statusCode}`);
}

res.pipe(ws);
res.on('error', (e) => {
console.log('here was error', e);
fs.unlink(dest); // Delete the file async. (But we don't check the result)
reject(e);
});

ws.on('finish', () => {
ws.close();
resolve();
});

return null;
})
.on('error', (e) => {
console.log('error in req', e.code, e.message);
reject(e);
});
});
});
}

Expand All @@ -67,7 +72,7 @@ async function extractFile(isZip, src, dest) {
if (isZip) {
await extractZip(src, { dir: dest });
} else {
await tar.extract({ file: src, cwd: dest }, null);
await tar.extract({ file: src, cwd: dest }, null);
}
}

Expand All @@ -87,12 +92,12 @@ async function main() {
await extractFile(isZip, tmpFile, targetDir);

console.log('Unlink', tmpFile);
await fs.promises.unlink(tmpFile).catch((e) => err(e));
console.log(`$tmpFile was deleted`);
await fs.promises.unlink(tmpFile);
console.log(`${tmpFile} was deleted`);
}

main().catch((e) => {
console.log(e);
console.log('throw main', e.code, e.message);

process.exit(1);
});
18 changes: 7 additions & 11 deletions examples/simple-example.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
'use strict';
const MyStem = require('../lib/MyStem');

var MyStem = require('../lib/MyStem');
var Promise = require('bluebird');

var myStem = new MyStem();
const myStem = new MyStem();

myStem.start();

var words = ['карусели', 'немцы', 'печалька'];
const words = ['карусели', 'немцы', 'печалька'];

var promises = words.map(function(word) {
return myStem.lemmatize(word)
});
const promises = words.map((word) => myStem.lemmatize(word));

Promise.all(promises).then(function(lemmas) {
Promise.all(promises)
.then((lemmas) => {
console.log(lemmas);
myStem.stop();
});
});
34 changes: 24 additions & 10 deletions lib/MyStem.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ const readline = require('readline');
const path = require('path');

function MyStem(args = {}) {
Object.assign(args, args, {});

this.path = args.path || path.join(__dirname, '..', 'vendor', process.platform, 'mystem');

if (process.platform === 'win32') {
Expand All @@ -28,7 +26,12 @@ MyStem.prototype = {

if (handler) {
const data = JSON.parse(line);
handler.resolve(this.getGrammemes(data, handler.onlyLemma) || handler.word);
const options = {
onlyLemma: handler.onlyLemma,
fullAnalysis: handler.fullAnalysis,
};

handler.resolve(this.getGrammemes(data, options) || handler.word);
}
});

Expand Down Expand Up @@ -58,11 +61,14 @@ MyStem.prototype = {
},

lemmatize(word) {
const onlyLemma = true;
return this.callMyStem(word, onlyLemma);
return this.callMyStem(word, { onlyLemma: true });
},

analyze(word) {
return this.callMyStem(word, { fullAnalysis: true });
},

callMyStem(word, onlyLemma) {
callMyStem(word, options = {}) {
const firstWord = word.replace(/(\S+)\s+.*/, '$1'); // take only first word. TODO

return new Promise((resolve, reject) => {
Expand All @@ -76,20 +82,28 @@ MyStem.prototype = {
resolve,
reject,
word: firstWord,
onlyLemma,
onlyLemma: options.onlyLemma,
fullAnalysis: options.fullAnalysis,
});
});
},

getGrammemes(data, onlyLemma) {
if (!data[0]) return undefined;
getGrammemes(data, options = {}) {
if (!data[0]) {
return null;
}

if (data[0].analysis.length) {
if (onlyLemma) {
if (options.fullAnalysis) {
return data[0];
}

if (options.onlyLemma) {
return data[0].analysis[0].lex;
}

const array = [];

array.push(data[0].analysis[0].lex);

data[0].analysis[0].gr.split(',').forEach((elem) => {
Expand Down
Loading

0 comments on commit 3760249

Please sign in to comment.