Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: classify emails by importance based on subjects #10277

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ The rating depends on the installed text processing backend. See [the rating ove

Learn more about the Nextcloud Ethical AI Rating [in our blog](https://nextcloud.com/blog/nextcloud-ethical-ai-rating/).
]]></description>
<version>4.2.0-alpha.0</version>
<version>4.2.0-alpha.1</version>
<licence>agpl</licence>
<author homepage="https://github.com/ChristophWurst">Christoph Wurst</author>
<author homepage="https://github.com/GretaD">GretaD</author>
Expand Down Expand Up @@ -90,6 +90,7 @@ Learn more about the Nextcloud Ethical AI Rating [in our blog](https://nextcloud
<command>OCA\Mail\Command\TrainAccount</command>
<command>OCA\Mail\Command\UpdateAccount</command>
<command>OCA\Mail\Command\UpdateSystemAutoresponders</command>
<command>OCA\Mail\Command\RunMetaEstimator</command>
</commands>
<settings>
<admin>OCA\Mail\Settings\AdminSettings</admin>
Expand Down
2 changes: 0 additions & 2 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
use OCA\Mail\Listener\MessageCacheUpdaterListener;
use OCA\Mail\Listener\MessageKnownSinceListener;
use OCA\Mail\Listener\MoveJunkListener;
use OCA\Mail\Listener\NewMessageClassificationListener;
use OCA\Mail\Listener\NewMessagesNotifier;
use OCA\Mail\Listener\OauthTokenRefreshListener;
use OCA\Mail\Listener\OptionalIndicesListener;
Expand Down Expand Up @@ -130,7 +129,6 @@ public function register(IRegistrationContext $context): void {
$context->registerEventListener(MessageDeletedEvent::class, MessageCacheUpdaterListener::class);
$context->registerEventListener(MessageSentEvent::class, AddressCollectionListener::class);
$context->registerEventListener(MessageSentEvent::class, InteractionListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, NewMessageClassificationListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, MessageKnownSinceListener::class);
$context->registerEventListener(NewMessagesSynchronized::class, NewMessagesNotifier::class);
$context->registerEventListener(SynchronizationEvent::class, AccountSynchronizedThreadUpdaterListener::class);
Expand Down
5 changes: 1 addition & 4 deletions lib/BackgroundJob/TrainImportanceClassifierJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@
}

try {
$this->classifier->train(
$account,
$this->logger
);
$this->classifier->train($account, $this->logger);

Check warning on line 72 in lib/BackgroundJob/TrainImportanceClassifierJob.php

View check run for this annotation

Codecov / codecov/patch

lib/BackgroundJob/TrainImportanceClassifierJob.php#L72

Added line #L72 was not covered by tests
} catch (Throwable $e) {
$this->logger->error('Cron importance classifier training failed: ' . $e->getMessage(), [
'exception' => $e,
Expand Down
23 changes: 14 additions & 9 deletions lib/Command/PredictImportance.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use OCA\Mail\Db\Message;
use OCA\Mail\Service\AccountService;
use OCA\Mail\Service\Classification\ImportanceClassifier;
use OCA\Mail\Support\ConsoleLoggerDecorator;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\IConfig;
use Psr\Log\LoggerInterface;
Expand All @@ -25,6 +26,7 @@
class PredictImportance extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SENDER = 'sender';
public const ARGUMENT_SUBJECT = 'subject';

private AccountService $accountService;
private ImportanceClassifier $classifier;
Expand All @@ -43,26 +45,27 @@
$this->config = $config;
}

/**
* @return void
*/
protected function configure() {
protected function configure(): void {

Check warning on line 48 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L48

Added line #L48 was not covered by tests
$this->setName('mail:predict-importance');
$this->setDescription('Predict importance of an incoming message');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addArgument(self::ARGUMENT_SENDER, InputArgument::REQUIRED);
$this->addArgument(self::ARGUMENT_SUBJECT, InputArgument::OPTIONAL);

Check warning on line 53 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L53

Added line #L53 was not covered by tests
}

public function isEnabled() {
public function isEnabled(): bool {

Check warning on line 56 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L56

Added line #L56 was not covered by tests
return $this->config->getSystemValueBool('debug');
}

/**
* @return int
*/
protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$sender = $input->getArgument(self::ARGUMENT_SENDER);
$subject = $input->getArgument(self::ARGUMENT_SUBJECT) ?? '';

Check warning on line 63 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L63

Added line #L63 was not covered by tests

$consoleLogger = new ConsoleLoggerDecorator(
$this->logger,
$output
);

Check warning on line 68 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L65-L68

Added lines #L65 - L68 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
Expand All @@ -73,9 +76,11 @@
$fakeMessage = new Message();
$fakeMessage->setUid(0);
$fakeMessage->setFrom(AddressList::parse("Name <$sender>"));
$fakeMessage->setSubject($subject);

Check warning on line 79 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L79

Added line #L79 was not covered by tests
[$prediction] = $this->classifier->classifyImportance(
$account,
[$fakeMessage]
[$fakeMessage],
$consoleLogger

Check warning on line 83 in lib/Command/PredictImportance.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/PredictImportance.php#L82-L83

Added lines #L82 - L83 were not covered by tests
);
if ($prediction) {
$output->writeln('Message is important');
Expand Down
117 changes: 117 additions & 0 deletions lib/Command/RunMetaEstimator.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<?php

declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

namespace OCA\Mail\Command;

use OCA\Mail\Service\AccountService;
use OCA\Mail\Service\Classification\ImportanceClassifier;
use OCA\Mail\Support\ConsoleLoggerDecorator;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\IConfig;
use Psr\Log\LoggerInterface;
use Rubix\ML\Backends\Amp;
use Rubix\ML\Classifiers\KNearestNeighbors;
use Rubix\ML\CrossValidation\KFold;
use Rubix\ML\CrossValidation\Metrics\FBeta;
use Rubix\ML\GridSearch;
use Rubix\ML\Kernels\Distance\Euclidean;
use Rubix\ML\Kernels\Distance\Jaccard;
use Rubix\ML\Kernels\Distance\Manhattan;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;

class RunMetaEstimator extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SHUFFLE = 'shuffle';

private AccountService $accountService;
private LoggerInterface $logger;
private ImportanceClassifier $classifier;
private IConfig $config;

public function __construct(

Check warning on line 40 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L40

Added line #L40 was not covered by tests
AccountService $accountService,
LoggerInterface $logger,
ImportanceClassifier $classifier,
IConfig $config,
) {
parent::__construct();

Check warning on line 46 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L46

Added line #L46 was not covered by tests

$this->accountService = $accountService;
$this->logger = $logger;
$this->classifier = $classifier;
$this->config = $config;

Check warning on line 51 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L48-L51

Added lines #L48 - L51 were not covered by tests
}

protected function configure(): void {
$this->setName('mail:account:run-meta-estimator');
$this->setDescription('Run the meta estimator for an account');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addOption(self::ARGUMENT_SHUFFLE, null, null, 'Shuffle data set before training');

Check warning on line 58 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L54-L58

Added lines #L54 - L58 were not covered by tests
}

public function isEnabled(): bool {
return $this->config->getSystemValueBool('debug');

Check warning on line 62 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L61-L62

Added lines #L61 - L62 were not covered by tests
}

protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$shuffle = (bool)$input->getOption(self::ARGUMENT_SHUFFLE);

Check warning on line 67 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L65-L67

Added lines #L65 - L67 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
} catch (DoesNotExistException $e) {
$output->writeln("<error>Account $accountId does not exist</error>");
return 1;

Check warning on line 73 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L70-L73

Added lines #L70 - L73 were not covered by tests
}

$consoleLogger = new ConsoleLoggerDecorator(
$this->logger,
$output
);

Check warning on line 79 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L76-L79

Added lines #L76 - L79 were not covered by tests

$estimator = static function () use ($consoleLogger) {
$params = [
[5, 10, 15, 20, 25, 30, 35, 40], // Neighbors
[true, false], // Weighted?
[new Euclidean(), new Manhattan(), new Jaccard()], // Kernel
];

Check warning on line 86 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L81-L86

Added lines #L81 - L86 were not covered by tests

$estimator = new GridSearch(
KNearestNeighbors::class,
$params,
new FBeta(),
new KFold(5)
);
$estimator->setLogger($consoleLogger);
$estimator->setBackend(new Amp());
return $estimator;
};

Check warning on line 97 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L88-L97

Added lines #L88 - L97 were not covered by tests

$pipeline = $this->classifier->train(
$account,
$consoleLogger,
$estimator,
$shuffle,
false,
);

Check warning on line 105 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L99-L105

Added lines #L99 - L105 were not covered by tests

/** @var GridSearch $metaEstimator */
$metaEstimator = $pipeline?->getEstimator();
if ($metaEstimator !== null) {
$output->writeln("<info>Best estimator: {$metaEstimator->base()}</info>");

Check warning on line 110 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L108-L110

Added lines #L108 - L110 were not covered by tests
}

$mbs = (int)(memory_get_peak_usage() / 1024 / 1024);
$output->writeln('<info>' . $mbs . 'MB of memory used</info>');
return 0;

Check warning on line 115 in lib/Command/RunMetaEstimator.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/RunMetaEstimator.php#L113-L115

Added lines #L113 - L115 were not covered by tests
}
}
38 changes: 28 additions & 10 deletions lib/Command/TrainAccount.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
declare(strict_types=1);

/**
* SPDX-FileCopyrightText: 2019 Nextcloud GmbH and Nextcloud contributors
* SPDX-FileCopyrightText: 2019-2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/

Expand All @@ -23,6 +23,9 @@

class TrainAccount extends Command {
public const ARGUMENT_ACCOUNT_ID = 'account-id';
public const ARGUMENT_SHUFFLE = 'shuffle';
public const ARGUMENT_DRY_RUN = 'dry-run';
public const ARGUMENT_FORCE = 'force';

private AccountService $accountService;
private ImportanceClassifier $classifier;
Expand All @@ -41,28 +44,39 @@
$this->classificationSettingsService = $classificationSettingsService;
}

/**
* @return void
*/
protected function configure() {
protected function configure(): void {

Check warning on line 47 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L47

Added line #L47 was not covered by tests
$this->setName('mail:account:train');
$this->setDescription('Train the classifier of new messages');
$this->addArgument(self::ARGUMENT_ACCOUNT_ID, InputArgument::REQUIRED);
$this->addOption(self::ARGUMENT_SHUFFLE, null, null, 'Shuffle data set before training');
$this->addOption(
self::ARGUMENT_DRY_RUN,
null,
null,
'Don\'t persist classifier after training'
);
$this->addOption(
self::ARGUMENT_FORCE,
null,
null,
'Train an estimator even if the classification is disabled by the user'
);

Check warning on line 63 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L51-L63

Added lines #L51 - L63 were not covered by tests
}

/**
* @return int
*/
protected function execute(InputInterface $input, OutputInterface $output): int {
$accountId = (int)$input->getArgument(self::ARGUMENT_ACCOUNT_ID);
$shuffle = (bool)$input->getOption(self::ARGUMENT_SHUFFLE);
$dryRun = (bool)$input->getOption(self::ARGUMENT_DRY_RUN);
$force = (bool)$input->getOption(self::ARGUMENT_FORCE);

Check warning on line 70 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L68-L70

Added lines #L68 - L70 were not covered by tests

try {
$account = $this->accountService->findById($accountId);
} catch (DoesNotExistException $e) {
$output->writeln("<error>account $accountId does not exist</error>");
return 1;
}
if (!$this->classificationSettingsService->isClassificationEnabled($account->getUserId())) {

if (!$force && !$this->classificationSettingsService->isClassificationEnabled($account->getUserId())) {

Check warning on line 79 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L79

Added line #L79 was not covered by tests
$output->writeln("<info>classification is turned off for account $accountId</info>");
return 2;
}
Expand All @@ -71,9 +85,13 @@
$this->logger,
$output
);

$this->classifier->train(
$account,
$consoleLogger
$consoleLogger,
null,
$shuffle,
!$dryRun

Check warning on line 94 in lib/Command/TrainAccount.php

View check run for this annotation

Codecov / codecov/patch

lib/Command/TrainAccount.php#L91-L94

Added lines #L91 - L94 were not covered by tests
);

$mbs = (int)(memory_get_peak_usage() / 1024 / 1024);
Expand Down
Loading
Loading