diff --git a/modules/harvest/harvest.install b/modules/harvest/harvest.install index 64c25a4134..b09b7d2cb6 100644 --- a/modules/harvest/harvest.install +++ b/modules/harvest/harvest.install @@ -1,5 +1,7 @@ schema(); + + // Move the table so we can rebuild from it. + $schema->renameTable($table_name, $table_name_temp); + $messages = "Table {$table_name} moved to {$table_name_temp}. " . PHP_EOL; + // Uninstall the the original entity. + $original_type = $definition_update_manager->getEntityType($entity_type_name); + $definition_update_manager->uninstallEntityType($original_type); + $messages .= "Old harvest_run entity removed. " . PHP_EOL; + $entity_type_manager->clearCachedDefinitions(); + // Install the new entity. + //$entity_type = $entity_type_manager->get($entity_type_name); + $entity_type_manager->clearCachedDefinitions(); + $entity_type_def = $entity_type_manager->getDefinition($entity_type_name); + $definition_update_manager->installEntityType($entity_type_def); + $messages .= "New harvest_run entity installed. " . PHP_EOL; + + return $messages; +} + +/** + * Move data from temp table back into harvest_run. + * + * @see https://github.com/GetDKAN/dkan/issues/4287 + */ +function harvest_update_8010(&$sandbox) { + $table_name = 'harvest_runs'; + $table_name_temp = "{$table_name}_temp"; + $messages = ''; + $schema = \Drupal::database()->schema(); /** @var \Drupal\harvest\HarvestUtility $harvest_utility */ $harvest_utility = \Drupal::service('dkan.harvest.utility'); - $harvest_utility->harvestRunsUpdate(); - return 'Harvest runs coalesced into table harvest_runs.'; + + if (!isset($sandbox['total'])) { + // Sandbox has not been initiated, so initiate it. + $sandbox['items_to_process'] = $harvest_utility->getTempRunIdsForUpdate($table_name_temp); + $sandbox['total'] = count($sandbox['items_to_process']); + $sandbox['current'] = 0; + } + // Process them in batches of 25. + $harvest_runs_batch = array_slice($sandbox['items_to_process'], 0, 25, TRUE); + // Loop through all the entries in temp table and save them new. + foreach ($harvest_runs_batch as $key => $time_id) { + // Load the old row. + $row = $harvest_utility->readTempHarvestRunForUpdate($table_name_temp, $time_id); + // Write the new harvest run. + $harvest_utility->writeHarvestRunFromUpdate($row['id'], $row['harvest_plan_id'], $row['data'], $row['extract_status']); + // The item has been processed, remove it from the array. + unset($sandbox['items_to_process'][$key]); + } + + // Determine when to stop batching. + $sandbox['current'] = ($sandbox['total'] - count($sandbox['items_to_process'])); + $sandbox['#finished'] = (empty($sandbox['total'])) ? 1 : ($sandbox['current'] / $sandbox['total']); + $vars = [ + '@completed' => $sandbox['current'], + '@total' => $sandbox['total'], + ]; + + $messages = t('Processed: @completed/@total.', $vars) . PHP_EOL; + // Log the all finished notice. + if ($sandbox['#finished'] === 1) { + // The update of the harvest_runs is complete. + $messages .= t('Data in harvest_runs updated to new schema:') . PHP_EOL; + $dropped = $schema->dropTable($table_name_temp); + if ($dropped) { + $messages .= t('Temporary table dropped.') . PHP_EOL; + } + } + + return $messages; +} + +/** + * Move entries from harvest_[ID]_runs to harvest_runs. + * + * This finishes the process started by harvest_update_8007 and re-runs 8008. +*/ +function harvest_update_8011(&$sandbox) { + /** @var \Drupal\harvest\HarvestUtility $harvest_utility */ + $harvest_utility = \Drupal::service('dkan.harvest.utility'); + $harvest_utility->harvestRunsUpdate(); + return 'Harvest plan specific run tables coalesced into table harvest_runs.'; } diff --git a/modules/harvest/harvest.services.yml b/modules/harvest/harvest.services.yml index 8d9cb604eb..10dd0569ce 100644 --- a/modules/harvest/harvest.services.yml +++ b/modules/harvest/harvest.services.yml @@ -19,6 +19,7 @@ services: - '@dkan.harvest.storage.harvest_run_repository' - '@database' - '@dkan.harvest.logger_channel' + - '@uuid' dkan.harvest.harvest_plan_repository: class: Drupal\harvest\Entity\HarvestPlanRepository arguments: diff --git a/modules/harvest/src/Commands/HarvestCommands.php b/modules/harvest/src/Commands/HarvestCommands.php index 9308c98ee8..f28ed9d89b 100644 --- a/modules/harvest/src/Commands/HarvestCommands.php +++ b/modules/harvest/src/Commands/HarvestCommands.php @@ -221,11 +221,6 @@ public function runAll($options = ['new' => FALSE]) { foreach ($plan_ids as $plan_id) { $result = $this->harvestService->runHarvest($plan_id); $runs_info[] = $result; - // Since run IDs are also one-second-resolution timestamps, we must wait - // one second before running the next harvest. - // @todo Remove this sleep when we've switched to a better system for - // timestamps. - sleep(1); } $this->renderHarvestRunsInfo($runs_info); } diff --git a/modules/harvest/src/Entity/HarvestRun.php b/modules/harvest/src/Entity/HarvestRun.php index 66de79c20d..f2c097234e 100644 --- a/modules/harvest/src/Entity/HarvestRun.php +++ b/modules/harvest/src/Entity/HarvestRun.php @@ -52,9 +52,6 @@ * "canonical" = "/harvest-run/{harvest_run}", * }, * ) - * - * @todo Convert to using microtime() or other better system for the timestamp/ - * id. */ final class HarvestRun extends HarvestEntityBase implements HarvestRunInterface { @@ -64,11 +61,25 @@ final class HarvestRun extends HarvestEntityBase implements HarvestRunInterface public static function baseFieldDefinitions(EntityTypeInterface $entity_type) { $base_fields = parent::baseFieldDefinitions($entity_type); - // The id is the unique ID for the harvest run, and also the timestamp at - // which the run occurred, generated by time(). - $base_fields['id'] = static::getBaseFieldIdentifier( - new TranslatableMarkup('Harvest Run') - ); + $base_fields['id'] = BaseFieldDefinition::create('integer') + ->setLabel(t('ID')) + ->setDescription(t('The ID of the Harvest Run entity.')) + ->setDisplayOptions('view', [ + 'label' => 'inline', + 'weight' => 0, + ]) + ->setReadOnly(TRUE); + + $base_fields['timestamp'] = BaseFieldDefinition::create('timestamp') + ->setLabel(t('timestamp')) + ->setDescription(t('The timestamp of when this harvest was run.')) + ->setRequired(TRUE); + + $base_fields['uuid'] = BaseFieldDefinition::create('uuid') + ->setLabel(t('UUID')) + ->setDescription(t('The unique identifier for this harvest_run')) + ->setRequired(TRUE) + ->setReadOnly(TRUE); // Harvest plan id. This is the name of the harvest plan as seen in the UI. $base_fields['harvest_plan_id'] = BaseFieldDefinition::create('string') diff --git a/modules/harvest/src/Entity/HarvestRunRepository.php b/modules/harvest/src/Entity/HarvestRunRepository.php index 04af86439b..165ea349f5 100644 --- a/modules/harvest/src/Entity/HarvestRunRepository.php +++ b/modules/harvest/src/Entity/HarvestRunRepository.php @@ -24,7 +24,7 @@ class HarvestRunRepository { * * @var \Drupal\Core\Entity\EntityStorageInterface */ - private EntityStorageInterface $runStorage; + protected EntityStorageInterface $runStorage; /** * Database connection service. @@ -89,8 +89,8 @@ public function destructForPlanId(string $plan_id) { * Run data. Usually the result returned by Harvester::harvest(). * @param string $plan_id * The plan identifier. - * @param string $run_id - * The run identifier, which is also a timestamp. + * @param string $timestamp + * The run timestamp. * * @return string * The run identifier. @@ -100,9 +100,9 @@ public function destructForPlanId(string $plan_id) { * @todo Eventually all the subsystems will be able to understand the entity * rather than needing conversion to and from the array format. */ - public function storeRun(array $run_data, string $plan_id, string $run_id): string { + public function storeRun(array $run_data, string $plan_id, string $timestamp): string { $field_values = [ - 'id' => $run_id, + 'timestamp' => (int) $timestamp, 'harvest_plan_id' => $plan_id, ]; $field_values['extract_status'] = $run_data['status']['extract'] ?? 'FAILURE'; @@ -139,7 +139,7 @@ public function storeRun(array $run_data, string $plan_id, string $run_id): stri // JSON encode remaining run data. $field_values['data'] = json_encode($run_data); - return $this->writeEntity($field_values, $plan_id, $run_id); + return $this->writeEntity($field_values, $plan_id, $timestamp); } /** @@ -147,15 +147,18 @@ public function storeRun(array $run_data, string $plan_id, string $run_id): stri * * @param string $plan_id * The harvest plan identifier. - * @param string $run_id - * The harvest run identifier. + * @param string|null $timestamp + * The harvest run timestamp. No longer used. Retained for BC. * * @return string|null * JSON-encoded run result data, or NULL if none could be found. */ - public function retrieveRunJson(string $plan_id, string $run_id): ?string { - if ($entity = $this->loadEntity($plan_id, $run_id)) { - return json_encode($entity->toResult()); + public function retrieveRunJson(string $plan_id, $timestamp = NULL): ?string { + $run_ids = $this->retrieveAllRunIds($plan_id); + $run_id = reset($run_ids); + if ($run_id) { + $run_entity = $this->runStorage->load($run_id); + return json_encode($run_entity->toResult()); } return NULL; } @@ -198,7 +201,7 @@ public function retrieveAllRunsJson(string $plan_id): array { } /** - * Get all the harvet plan ids available in the harvest runs table. + * Get all the harvest plan ids available in the harvest runs table. * * @return array * All the harvest plan ids present in the harvest runs table, as both key @@ -218,18 +221,18 @@ public function getUniqueHarvestPlanIds(): array { /** * Get the extracted UUIDs from the given harvest run. * - * @param string $plan_id - * The harvest plan ID. - * @param string $run_id - * The harvest run ID. + * @param string $planId + * The harvest plan ID. deprecated: no longer needed but kept for BC. + * @param string $runId + * The harvest_run entity id. * * @return string[] * Array of UUIDs, keyed by UUID. Note that these are UUIDs by convention; * they could be any string value. */ - public function getExtractedUuids(string $plan_id, string $run_id): array { + public function getExtractedUuids(string $planId, string $runId): array { $extracted = []; - if ($entity = $this->loadEntity($plan_id, $run_id)) { + if ($entity = $this->runStorage->load($runId)) { foreach ($entity->get('extracted_uuid')->getValue() as $field) { $uuid = $field['value']; $extracted[$uuid] = $uuid; @@ -243,16 +246,17 @@ public function getExtractedUuids(string $plan_id, string $run_id): array { * * @param string $plan_id * Plan ID. - * @param string $run_id - * Run ID, which is a timestamp. + * @param string $timestamp + * The timestamp for the run. Formerly the id. * * @return \Drupal\harvest\HarvestRunInterface|\Drupal\Core\Entity\EntityInterface|null * The loaded entity or NULL if none could be loaded. */ - public function loadEntity(string $plan_id, string $run_id): ?HarvestRunInterface { + public function loadEntity(string $plan_id, string $timestamp): ?HarvestRunInterface { if ($ids = $this->runStorage->getQuery() - ->condition('id', $run_id) + ->condition('timestamp', $timestamp) ->condition('harvest_plan_id', $plan_id) + ->sort('id', 'DESC') ->range(0, 1) ->accessCheck(FALSE) ->execute() @@ -262,6 +266,36 @@ public function loadEntity(string $plan_id, string $run_id): ?HarvestRunInterfac return NULL; } + /** + * Helper method to load the most recent harvest_run entity given a plan ID. + * + * @param string $harvest_plan_id + * Plan ID. + * + * @return \Drupal\harvest\HarvestRunInterface|null + * The loaded harvest_run entity or NULL if none could be loaded. + */ + public function loadRunByPlan($harvest_plan_id): ?HarvestRunInterface { + $run_id = $this->getLastHarvestRunId($harvest_plan_id); + return ($run_id) ? $this->runStorage->load($run_id) : NULL; + } + + /** + * Get a harvest's most recent run identifier. + * + * Since the run record id is a timestamp, we can sort on the id. + * + * @param string $plan_id + * The harvest plan identifier. + * + * @return string + * The entity id of the most recent harvest run. + */ + public function getLastHarvestRunId(string $plan_id): string { + $run_ids = $this->retrieveAllRunIds($plan_id); + return reset($run_ids); + } + /** * Write a harvest_run entity, updating or saving as needed. * @@ -269,21 +303,22 @@ public function loadEntity(string $plan_id, string $run_id): ?HarvestRunInterfac * Structured data ready to send to entity_storage->create(). * @param string $plan_id * Harvest plan identifier. - * @param string $run_id - * Harvest run identifier. + * @param mixed $timestamp + * Harvest run timestamp. * * @return string - * Harvest plan identifier for the entity that was written. + * Harvest run id. */ - public function writeEntity(array $field_values, string $plan_id, string $run_id) { + public function writeEntity(array $field_values, string $plan_id, mixed $timestamp) { + $timestamp = (int) $timestamp; /** @var \Drupal\harvest\HarvestRunInterface $entity */ - $entity = $this->loadEntity($plan_id, $run_id); + $entity = $this->loadEntity($plan_id, $timestamp); if ($entity) { // Modify entity. - unset($field_values['id']); foreach ($field_values as $key => $value) { $entity->set($key, $value); } + $field_values['id'] = $entity->id(); } else { // Create new entity. diff --git a/modules/harvest/src/HarvestPlanListBuilder.php b/modules/harvest/src/HarvestPlanListBuilder.php index 7b457a5e13..7096f7c2d0 100644 --- a/modules/harvest/src/HarvestPlanListBuilder.php +++ b/modules/harvest/src/HarvestPlanListBuilder.php @@ -4,6 +4,7 @@ use Drupal\Core\Entity\EntityInterface; use Drupal\Core\Entity\EntityListBuilder; +use Drupal\Core\Entity\EntityStorageInterface; use Drupal\Core\Entity\EntityTypeInterface; use Drupal\Core\Link; use Drupal\Core\Url; @@ -19,26 +20,26 @@ class HarvestPlanListBuilder extends EntityListBuilder { /** - * Harvest service. + * Harvest run repository service. * - * @var \Drupal\harvest\HarvestService + * @var \Drupal\harvest\Entity\HarvestRunRepository */ - protected HarvestService $harvestService; + protected HarvestRunRepository $harvestRunRepository; /** - * Harvest run repository service. + * Entity storage service for the harvest_run entity type. * - * @var \Drupal\harvest\Entity\HarvestRunRepository + * @var \Drupal\Core\Entity\EntityStorageInterface */ - protected HarvestRunRepository $harvestRunRepository; + protected EntityStorageInterface $harvestRunStorage; /** * {@inheritDoc} */ public static function createInstance(ContainerInterface $container, EntityTypeInterface $entity_type) { $builder = parent::createInstance($container, $entity_type); - $builder->harvestService = $container->get('dkan.harvest.service'); $builder->harvestRunRepository = $container->get('dkan.harvest.storage.harvest_run_repository'); + $builder->harvestRunStorage = $container->get('entity_type.manager')->getStorage('harvest_run'); return $builder; } @@ -74,9 +75,10 @@ public function buildRow(EntityInterface $entity) { $harvest_plan_id = $entity->get('id')->getString(); $run_entity = NULL; - if ($run_id = $this->harvestService->getLastHarvestRunId($harvest_plan_id)) { + if ($run_id = $this->harvestRunRepository->getLastHarvestRunId($harvest_plan_id)) { // There is a run identifier, so we should get that info. - $run_entity = $this->harvestRunRepository->loadEntity($harvest_plan_id, $run_id); + /** @var \Drupal\harvest\HarvestRunInterface $run_entity */ + $run_entity = $this->harvestRunStorage->load($run_id); } // Default values for a row if there's no info. @@ -100,7 +102,7 @@ public function buildRow(EntityInterface $entity) { 'data' => $extract_status, 'class' => strtolower($extract_status), ]; - $row['last_run'] = date('m/d/y H:m:s T', $run_id); + $row['last_run'] = date('m/d/y H:m:s T', $run_entity->get('timestamp')->value); $row['dataset_count'] = $interpreter->countProcessed(); } // Don't call parent::buildRow() because we don't want operations (yet). diff --git a/modules/harvest/src/HarvestService.php b/modules/harvest/src/HarvestService.php index 6d3ba1ac19..7fc1f98827 100644 --- a/modules/harvest/src/HarvestService.php +++ b/modules/harvest/src/HarvestService.php @@ -58,7 +58,7 @@ class HarvestService implements ContainerInjectionInterface { * * @var \Drupal\harvest\Entity\HarvestRunRepository */ - private HarvestRunRepository $runRepository; + public HarvestRunRepository $runRepository; /** * DKAN logger channel. @@ -208,7 +208,7 @@ public function revertHarvest($id) { public function runHarvest($plan_id) { $harvester = $this->getHarvester($plan_id); - $run_id = (string) time(); + $timestamp = (string) time(); $result = $harvester->harvest(); if (empty($result['status']['extracted_items_ids'])) { @@ -217,9 +217,9 @@ public function runHarvest($plan_id) { $result['status']['orphan_ids'] = $this->getOrphanIdsFromResult($plan_id, $result['status']['extracted_items_ids']); $this->processOrphanIds($result['status']['orphan_ids']); - - $result['identifier'] = $run_id; - $this->runRepository->storeRun($result, $plan_id, $run_id); + // For legacy reasons, the identifier is the timestamp. + $result['identifier'] = $timestamp; + $this->runRepository->storeRun($result, $plan_id, $timestamp); return $result; } @@ -229,15 +229,15 @@ public function runHarvest($plan_id) { * * @param string $plan_id * The harvest plan ID. - * @param string $run_id - * The harvest run ID. + * @param string $timestamp + * The timestamp of the harvest_run. * * @return bool|string * JSON-encoded run information for the given run, or FALSE if no matching * runID is found. */ - public function getHarvestRunInfo(string $plan_id, string $run_id): bool|string { - if ($info = $this->runRepository->retrieveRunJson($plan_id, $run_id)) { + public function getHarvestRunInfo(string $plan_id, string $timestamp): bool|string { + if ($info = $this->runRepository->retrieveRunJson($plan_id, $timestamp)) { return $info; } return FALSE; @@ -248,14 +248,16 @@ public function getHarvestRunInfo(string $plan_id, string $run_id): bool|string * * @param string $plan_id * Harvest plan ID. - * @param string $run_id - * Harvest run ID. + * @param string $timestamp + * Harvest run timestamp. * * @return array * Array of status info from the run. */ - public function getHarvestRunResult(string $plan_id, string $run_id): array { - if ($entity = $this->runRepository->loadEntity($plan_id, $run_id)) { + public function getHarvestRunResult(string $plan_id, string $timestamp): array { + // This one has to keep using the loadEntity method as it may be looking up + // more than the most recent run. + if ($entity = $this->runRepository->loadEntity($plan_id, $timestamp)) { return $entity->toResult(); } else { @@ -296,20 +298,18 @@ public function getRunIdsForHarvest(string $plan_id): array { } /** - * Get a harvest's most recent run identifier. - * - * Since the run record id is a timestamp, we can sort on the id. + * Get a harvest's most recent run id. Passthrough for HarvestRunRepository. * * @param string $plan_id - * The harvest identifier. + * The harvest plan identifier. * * @return string - * The most recent harvest run record identifier. + * The entity id of the most recent harvest run. + * + * @deprecated in dkan:2.19.11 and is removed from dkan:3.0.0 Use runStorage::load(). */ public function getLastHarvestRunId(string $plan_id): string { - $run_ids = $this->runRepository->retrieveAllRunIds($plan_id); - rsort($run_ids); - return reset($run_ids); + return $this->runRepository->getLastHarvestRunId($plan_id); } /** @@ -354,7 +354,7 @@ protected function bulkUpdateStatus(string $harvestId, string $method): array { throw new \OutOfRangeException("Method {$method} does not exist"); } - $lastRunId = $this->getLastHarvestRunId($harvestId); + $lastRunId = $this->runRepository->getLastHarvestRunId($harvestId); $lastRunInfo = json_decode($this->getHarvestRunInfo($harvestId, $lastRunId)); $status = $lastRunInfo->status ?? NULL; if (!isset($status->extracted_items_ids)) { diff --git a/modules/harvest/src/HarvestUtility.php b/modules/harvest/src/HarvestUtility.php index 54cee32996..6c2aad0002 100644 --- a/modules/harvest/src/HarvestUtility.php +++ b/modules/harvest/src/HarvestUtility.php @@ -2,6 +2,7 @@ namespace Drupal\harvest; +use Drupal\Component\Uuid\UuidInterface; use Drupal\Core\Database\Connection; use Drupal\harvest\Entity\HarvestRunRepository; use Drupal\harvest\Storage\DatabaseTableFactory; @@ -11,8 +12,8 @@ /** * DKAN Harvest utility service for maintenance tasks. * - * These methods generally exist to support a thin Drush layer. These are - * methods that we don't need in the HarvestService object. + * These methods generally exist to support a thin Drush layer or hook_update_n. + * These are methods that we don't need in the HarvestService object. */ class HarvestUtility { @@ -58,6 +59,13 @@ class HarvestUtility { */ private LoggerInterface $logger; + /** + * Uuid service. + * + * @var \Drupal\Component\Uuid\UuidInterface + */ + private UuidInterface $uuidService; + /** * Constructor. */ @@ -67,7 +75,8 @@ public function __construct( HarvestHashesDatabaseTableFactory $hashesFactory, HarvestRunRepository $runRepository, Connection $connection, - LoggerInterface $loggerChannel + LoggerInterface $loggerChannel, + UuidInterface $uuid_service ) { $this->harvestService = $harvestService; $this->storeFactory = $storeFactory; @@ -75,6 +84,7 @@ public function __construct( $this->runRepository = $runRepository; $this->connection = $connection; $this->logger = $loggerChannel; + $this->uuidService = $uuid_service; } /** @@ -217,10 +227,10 @@ public function harvestHashUpdate() { */ public function convertRunTable(string $plan_id) { $old_runs_table = $this->storeFactory->getInstance('harvest_' . $plan_id . '_runs'); - foreach ($old_runs_table->retrieveAll() as $id) { - if ($data = $old_runs_table->retrieve($id)) { + foreach ($old_runs_table->retrieveAll() as $timestamp) { + if ($data = $old_runs_table->retrieve($timestamp)) { // Explicitly decode the data as an array. - $this->runRepository->storeRun(json_decode($data, TRUE), $plan_id, $id); + $this->runRepository->storeRun(json_decode($data, TRUE), $plan_id, $timestamp); } } } @@ -243,4 +253,74 @@ public function harvestRunsUpdate() { } } + /** + * Get the ids from the temp harvest run table. + * + * Only needed for harvest_update_8010. + * + * @param mixed $table_name_temp + * The name of the temp table. + * + * @return array + * The ids of all the harvest runs in the table sorted oldest to newest. + */ + public function getTempRunIdsForUpdate($table_name_temp) : array { + $query = $this->connection->select($table_name_temp, 'hrt') + ->fields('hrt', ['id']) + ->orderBy('id', 'ASC'); + $result = $query->execute()->fetchCol(0); + // Can't rely on orderBy as the sort ends up natural, not numeric. + asort($result, SORT_NUMERIC); + + return $result ?? []; + } + + /** + * Reads a single harvest row from the harvest run temp table. + * + * Only needed for harvest_update_8010. + * + * @param string $table_name_temp + * Name of the table to read from. + * @param string $timestamp + * The id to read from, which was also the timestamp. + * + * @return array + * Elements from the row['id', 'harvest_plan_id', 'data', 'extract_status']. + */ + public function readTempHarvestRunForUpdate(string $table_name_temp, string $timestamp): array { + $query = $this->connection->select($table_name_temp, 'hrt') + ->fields('hrt', ['id', 'harvest_plan_id', 'data', 'extract_status']) + ->condition('id', $timestamp, '=') + ->orderBy('id', 'ASC'); + $result = $query->execute()->fetchAll(\PDO::FETCH_ASSOC); + return reset($result); + } + + /** + * Creates a new entry in harvest_run based on data from harvest run temp. + * + * Only needed for harvest_update_8010. + * + * @param string $timestamp + * The id from the old harvest run, which was a timestamp. + * @param string $harvest_plan_id + * The harvest plan id. + * @param string $data + * Data about the harvest. + * @param string $extract_status + * The status of the harvest. + */ + public function writeHarvestRunFromUpdate(string $timestamp, string $harvest_plan_id, string $data, string $extract_status): void { + $this->connection->insert('harvest_runs') + ->fields([ + 'timestamp' => (int) $timestamp, + 'harvest_plan_id' => $harvest_plan_id, + 'uuid' => $this->uuidService->generate(), + 'data' => $data, + 'extract_status' => $extract_status, + ]) + ->execute(); + } + } diff --git a/modules/harvest/src/OrphanDatasetsProcessor.php b/modules/harvest/src/OrphanDatasetsProcessor.php index 48d5f35bfd..a688df20de 100644 --- a/modules/harvest/src/OrphanDatasetsProcessor.php +++ b/modules/harvest/src/OrphanDatasetsProcessor.php @@ -32,7 +32,7 @@ public function setEntityTypeManager(EntityTypeManagerInterface $entityTypeManag * Get the dataset identifiers orphaned by the harvest currently in progress. */ private function getOrphanIdsFromResult(string $harvestId, array $extractedIds) : array { - if ($lastRunId = $this->getLastHarvestRunId($harvestId)) { + if ($lastRunId = $this->runRepository->getLastHarvestRunId($harvestId)) { $previouslyExtractedIds = $this->getExtractedIds($harvestId, $lastRunId); return array_values(array_diff($previouslyExtractedIds, $extractedIds)); } diff --git a/modules/harvest/tests/src/Kernel/Entity/HarvestRunRepositoryTest.php b/modules/harvest/tests/src/Kernel/Entity/HarvestRunRepositoryTest.php index e941cdbded..7fd20087ae 100644 --- a/modules/harvest/tests/src/Kernel/Entity/HarvestRunRepositoryTest.php +++ b/modules/harvest/tests/src/Kernel/Entity/HarvestRunRepositoryTest.php @@ -32,8 +32,8 @@ protected function setUp() : void { * @covers ::destructForPlanId */ public function testDestructForPlanId() { - $destruct_id = '1711038292'; - $keep_id = '1711038293'; + $destruct_timestamp = '1711038292'; + $keep_timestamp = '1711038293'; $destruct_this_plan_id = 'DESTRUCTTHISPLAN'; $keep_this_plan_id = 'KEEPTHISPLAN'; $run_data = [ @@ -44,10 +44,10 @@ public function testDestructForPlanId() { ]; /** @var \Drupal\harvest\Entity\HarvestRunRepository $harvest_run_repo */ $harvest_run_repo = $this->container->get('dkan.harvest.storage.harvest_run_repository'); - $run_data['identifier'] = $keep_id; - $harvest_run_repo->storeRun($run_data, $keep_this_plan_id, $keep_id); - $run_data['identifier'] = $destruct_id; - $harvest_run_repo->storeRun($run_data, $destruct_this_plan_id, $destruct_id); + $run_data['identifier'] = $keep_timestamp; + $harvest_run_repo->storeRun($run_data, $keep_this_plan_id, $keep_timestamp); + $run_data['identifier'] = $destruct_timestamp; + $harvest_run_repo->storeRun($run_data, $destruct_this_plan_id, $destruct_timestamp); $harvest_run_repo->destructForPlanId($destruct_this_plan_id); @@ -56,9 +56,9 @@ public function testDestructForPlanId() { /** @var \Drupal\harvest\HarvestRunInterface $run */ $this->assertInstanceOf( HarvestRunInterface::class, - $run = $harvest_run_repo->loadEntity($keep_this_plan_id, $keep_id) + $run = $harvest_run_repo->loadEntity($keep_this_plan_id, $keep_timestamp) ); - $this->assertEquals($keep_id, $run->id()); + $this->assertEquals($keep_timestamp, $run->get('timestamp')->value); } /** @@ -75,23 +75,23 @@ public function testRetrieveJson() { // Set up an entity. $plan_id = 'plan'; - $run_id = '1711038293'; + $run_timestamp = '1711038293'; $run_data = [ 'plan' => '{"plan": "json"}', 'status' => [ 'extract' => 'AWESOME', ], - 'identifier' => $run_id, + 'identifier' => $run_timestamp, ]; - $harvest_run_repo->storeRun($run_data, $plan_id, $run_id); + $run_id = $harvest_run_repo->storeRun($run_data, $plan_id, $run_timestamp); // Retrieve one run as JSON. $this->assertIsString( - $run_json = $harvest_run_repo->retrieveRunJson($plan_id, $run_id) + $run_json = $harvest_run_repo->retrieveRunJson($plan_id, $run_timestamp) ); $this->assertIsObject($run = json_decode($run_json)); foreach (array_keys($run_data) as $key) { - $this->assertObjectHasProperty($key, $run); + $this->assertObjectHasProperty($key, $run, "The key $key was not found in the harvest_run."); } // Retrieve all runs as JSON. @@ -99,10 +99,10 @@ public function testRetrieveJson() { $runs_json = $harvest_run_repo->retrieveAllRunsJson($plan_id) ); // Do some assertions. - $this->assertArrayHasKey($run_id, $runs_json); + $this->assertArrayHasKey($run_id, $runs_json, 'The harvest_run id of the test save was not found.'); $this->assertIsString($runs_json[$run_id]); $this->assertIsObject($run_decoded = json_decode($runs_json[$run_id])); - $this->assertEquals($run_id, $run_decoded->identifier); + $this->assertEquals($run_timestamp, $run_decoded->identifier, 'The harvest_run timestamps do not match.'); } /** @@ -112,11 +112,12 @@ public function testGetExtractedUuids() { /** @var \Drupal\harvest\Entity\HarvestRunRepository $harvest_run_repo */ $harvest_run_repo = $this->container->get('dkan.harvest.storage.harvest_run_repository'); $plan_id = 'plan'; - $run_id = '1711038293'; + $non_existent_run_id = '123456777'; + $timestamp = '1711038293'; // There are no entities at this point, so trying to get UUIDs should // result in an empty array. - $this->assertIsArray($uuids = $harvest_run_repo->getExtractedUuids($plan_id, $run_id)); + $this->assertIsArray($uuids = $harvest_run_repo->getExtractedUuids($plan_id, $non_existent_run_id)); $this->assertEquals([], $uuids); $uuids = [ @@ -131,9 +132,9 @@ public function testGetExtractedUuids() { 'extract' => 'AWESOME', 'extracted_items_ids' => $uuids, ], - 'identifier' => $run_id, + 'identifier' => $timestamp, ]; - $harvest_run_repo->storeRun($run_data, $plan_id, $run_id); + $run_id = $harvest_run_repo->storeRun($run_data, $plan_id, $timestamp); // Get the extracted UUIDs. $this->assertIsArray($extracted_uuids = $harvest_run_repo->getExtractedUuids($plan_id, $run_id)); diff --git a/modules/harvest/tests/src/Kernel/HarvestPlanListBuilderTest.php b/modules/harvest/tests/src/Kernel/HarvestPlanListBuilderTest.php index ef64c31371..e3a96e30be 100644 --- a/modules/harvest/tests/src/Kernel/HarvestPlanListBuilderTest.php +++ b/modules/harvest/tests/src/Kernel/HarvestPlanListBuilderTest.php @@ -101,9 +101,6 @@ public function testGoodHarvestRun() { $this->registerHarvestPlan($harvest_service, $plan_id); $run_result = $harvest_service->runHarvest($plan_id); $this->assertEquals('SUCCESS', $run_result['status']['extract'] ?? 'not_a_successful_run'); - // Get the actual run ID because it happens to be a timestamp for the last - // run, and it could be the next second by the time we assert against it. - $run_id = $harvest_service->getLastHarvestRunId($plan_id); $response = $list_builder->render(); @@ -111,7 +108,7 @@ public function testGoodHarvestRun() { $strings = array_merge(self::HARVEST_HEADERS, [ 'harvest_link', 'SUCCESS', - json_encode(date('m/d/y H:m:s T', $run_id)), + json_encode(date('m/d/y H:m:s T', $run_result['identifier'])), '2', ]); foreach ($strings as $string) { diff --git a/modules/harvest/tests/src/Kernel/HarvestServiceTest.php b/modules/harvest/tests/src/Kernel/HarvestServiceTest.php index b1333db3c6..0d16ceb415 100644 --- a/modules/harvest/tests/src/Kernel/HarvestServiceTest.php +++ b/modules/harvest/tests/src/Kernel/HarvestServiceTest.php @@ -196,7 +196,8 @@ public function testGetHarvestRunResult() { // getHarvestRunResult should return an empty array. /** @var \Drupal\harvest\HarvestService $harvest_service */ $harvest_service = $this->container->get('dkan.harvest.service'); - $this->assertEquals([], $harvest_service->getHarvestRunResult('any_plan', 'any_id')); + $any_harvest_run_id = 111; + $this->assertEquals([], $harvest_service->getHarvestRunResult('any_plan', $any_harvest_run_id)); // Register a harvest and run it. $plan_identifier = 'test_plan'; @@ -214,10 +215,10 @@ public function testGetHarvestRunResult() { $this->assertEquals($plan_identifier, $harvest_service->registerHarvest($plan)); $run_result = $harvest_service->runHarvest($plan_identifier); - $this->assertNotEmpty($run_id = $run_result['identifier']); + $this->assertNotEmpty($timestamp = $run_result['identifier']); // Compare the reloaded results to the ones from the original run. - $this->assertEquals($run_result, $harvest_service->getHarvestRunResult($plan_identifier, $run_id)); + $this->assertEquals($run_result, $harvest_service->getHarvestRunResult($plan_identifier, $timestamp)); } }