Skip to content

Commit

Permalink
MODEXPW-470 - .mrc-file creation (#542)
Browse files Browse the repository at this point in the history
* MODEXPW-470 Added mrc (squashed)
  • Loading branch information
obozhko-folio authored Jun 4, 2024
1 parent fa67100 commit 693e7dd
Show file tree
Hide file tree
Showing 14 changed files with 177 additions and 32 deletions.
3 changes: 2 additions & 1 deletion descriptors/ModuleDescriptor-template.json
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@
"inventory-storage.instance-types.collection.get",
"inventory-storage.nature-of-content-terms.item.get",
"inventory-storage.instance-formats.item.get",
"inventory-storage.instance-note-types.item.get"
"inventory-storage.instance-note-types.item.get",
"source-storage.sourceRecords.get"
]
},
{
Expand Down
7 changes: 7 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
<hypersistence-utils-hibernate-63.version>3.7.3</hypersistence-utils-hibernate-63.version>
<opencsv.version>5.7.1</opencsv.version>
<feign-jackson.version>12.1</feign-jackson.version>
<marc4j.version>2.9.2</marc4j.version>

<!-- Test properties-->
<junit-extensions.version>2.4.0</junit-extensions.version>
Expand Down Expand Up @@ -240,6 +241,12 @@
<version>3.1.0-M1</version>
</dependency>

<dependency>
<groupId>org.marc4j</groupId>
<artifactId>marc4j</artifactId>
<version>${marc4j.version}</version>
</dependency>

<!-- Test dependencies -->

<dependency>
Expand Down
42 changes: 31 additions & 11 deletions src/main/java/org/folio/dew/batch/MarcAsListStringsWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,51 @@
import lombok.extern.log4j.Log4j2;
import org.folio.dew.client.SrsClient;
import org.folio.dew.domain.dto.Formatable;
import org.folio.dew.error.BulkEditException;
import org.folio.dew.service.JsonToMarcConverter;
import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.Chunk;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemStream;
import org.springframework.batch.item.file.FlatFileItemWriter;
import org.springframework.util.Assert;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import static java.lang.String.format;
import static java.util.Objects.nonNull;
import static org.folio.dew.utils.Constants.NO_MARC_CONTENT;

@Log4j2
@StepScope
public class MarcAsListStringsWriter<T, U extends Formatable<T>> extends FlatFileItemWriter<List<U>> {

private SrsClient srsClient;
private MarcAsStringWriter<String> delegateToStringWriter;
private JsonToMarcConverter jsonToMarcConverter;

public MarcAsListStringsWriter(String outputFileName, SrsClient srsClient) {
public MarcAsListStringsWriter(String outputFileName, SrsClient srsClient, JsonToMarcConverter jsonToMarcConverter) {
super();
this.srsClient = srsClient;
this.jsonToMarcConverter = jsonToMarcConverter;
delegateToStringWriter = new MarcAsStringWriter<>(outputFileName);
}

@Override
public void write(Chunk<? extends List<U>> items) throws Exception {
delegateToStringWriter.write(new Chunk<>(items.getItems().stream().flatMap(List::stream).filter(itm -> itm.isInstanceFormat() && itm.isSourceMarc()).map(marc -> getMarcContent(marc.getId()))
.filter(Objects::nonNull).toList()));
delegateToStringWriter.write(new Chunk<>(items.getItems().stream().flatMap(List::stream)
.filter(itm -> itm.isInstanceFormat() && itm.isSourceMarc()).map(marc -> {
try {
return getMarcContent(marc.getId());
} catch (Exception e) {
log.error(e);
throw new BulkEditException(format(NO_MARC_CONTENT, marc.getId(), e.getMessage()));
}
})
.flatMap(List::stream).filter(Objects::nonNull).toList()));
}

@Override
Expand Down Expand Up @@ -60,15 +76,19 @@ public void close() {
}
}

private String getMarcContent(String id) {
var srsRecords = srsClient.getMarc(id, "INSTANCE");
if (srsRecords.getSourceRecords().isEmpty()) {
private List<String> getMarcContent(String id) throws Exception {
List<String> mrcRecords = new ArrayList<>();
var srsRecords = srsClient.getMarc(id, "INSTANCE").get("sourceRecords");
if (srsRecords.isEmpty()) {
log.warn("No SRS records found by instanceId = {}", id);
return null;
return mrcRecords;
}
var recordId = srsRecords.getSourceRecords().get(0).getRecordId();
var marcRecord = srsClient.getMarcContent(recordId);
log.info("MARC record found by recordId = {}", recordId);
return marcRecord.getRawRecord().getContent();
for (var jsonNodeIterator = srsRecords.elements(); jsonNodeIterator.hasNext();) {
var srsRec = jsonNodeIterator.next();
var parsedRec = srsRec.get("parsedRecord");
var content = parsedRec.get("content").toString();
mrcRecords.add(jsonToMarcConverter.convertJsonRecordToMarcRecord(content));
}
return mrcRecords;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.folio.dew.domain.dto.ItemIdentifier;
import org.folio.dew.error.BulkEditException;
import org.folio.dew.error.BulkEditSkipListener;
import org.folio.dew.service.JsonToMarcConverter;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.StepScope;
Expand Down Expand Up @@ -43,6 +44,7 @@ public class BulkEditInstanceIdentifiersJobConfig {
private final BulkEditInstanceProcessor bulkEditInstanceProcessor;
private final BulkEditSkipListener bulkEditSkipListener;
private final SrsClient srsClient;
private final JsonToMarcConverter jsonToMarcConverter;

@Bean
public Job bulkEditProcessInstanceIdentifiersJob(JobCompletionNotificationListener listener, Step bulkEditInstanceStep,
Expand Down Expand Up @@ -80,7 +82,7 @@ public CompositeItemWriter<List<InstanceFormat>> compositeInstanceListWriter(@Va
@Value("#{jobParameters['" + TEMP_LOCAL_MARC_PATH + "']}") String outputMarcName) {
var writer = new CompositeItemWriter<List<InstanceFormat>>();
writer.setDelegates(Arrays.asList(new CsvListFileWriter<>(outputFileName, InstanceFormat.getInstanceColumnHeaders(), InstanceFormat.getInstanceFieldsArray(), (field, i) -> field),
new JsonListFileWriter<>(new FileSystemResource(outputFileName + ".json")), new MarcAsListStringsWriter<>(outputMarcName, srsClient)));
new JsonListFileWriter<>(new FileSystemResource(outputFileName + ".json")), new MarcAsListStringsWriter<>(outputMarcName, srsClient, jsonToMarcConverter)));
return writer;
}
}
12 changes: 4 additions & 8 deletions src/main/java/org/folio/dew/client/SrsClient.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
package org.folio.dew.client;

import com.fasterxml.jackson.databind.JsonNode;
import org.folio.dew.config.feign.FeignClientConfiguration;
import org.folio.dew.domain.dto.MarcRecord;
import org.folio.dew.domain.dto.SrsRecordCollection;
import org.springframework.cloud.openfeign.FeignClient;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestParam;

@FeignClient(name = "source-storage", configuration = FeignClientConfiguration.class)
public interface SrsClient {

@GetMapping(value = "/source-records")
SrsRecordCollection getMarc(@RequestParam("instanceId") String instanceId, @RequestParam("idType") String idType);

@GetMapping(value = "/records/{srsId}")
MarcRecord getMarcContent(@PathVariable String srsId);
@GetMapping(value = "/source-records", produces = MediaType.APPLICATION_JSON_VALUE)
JsonNode getMarc(@RequestParam("instanceId") String instanceId, @RequestParam("idType") String idType);
}
43 changes: 43 additions & 0 deletions src/main/java/org/folio/dew/service/JsonToMarcConverter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.folio.dew.service;

import lombok.extern.log4j.Log4j2;
import org.marc4j.MarcException;
import org.marc4j.MarcJsonReader;
import org.marc4j.MarcStreamWriter;
import org.springframework.stereotype.Component;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

@Log4j2
@Component
public class JsonToMarcConverter {

public String convertJsonRecordToMarcRecord(String jsonRecord) throws IOException {
var byteArrayInputStream = new ByteArrayInputStream(jsonRecord.getBytes(StandardCharsets.UTF_8));
var byteArrayOutputStream = new ByteArrayOutputStream();
try (byteArrayInputStream; byteArrayOutputStream) {
var marcJsonReader = new MarcJsonReader(byteArrayInputStream);
var marcStreamWriter = new MarcStreamWriter(byteArrayOutputStream, StandardCharsets.UTF_8.name());
writeMarc(marcJsonReader, marcStreamWriter);
return byteArrayOutputStream.toString();
} catch (IOException e) {
log.error(e.getMessage());
throw e;
}
}

private void writeMarc(MarcJsonReader marcJsonReader, MarcStreamWriter marcStreamWriter) {
try {
while (marcJsonReader.hasNext()) {
var marc = marcJsonReader.next();
marcStreamWriter.write(marc);
}
} catch (Exception e) {
log.error(e.getMessage());
throw new MarcException(e.getMessage());
}
}
}
1 change: 1 addition & 0 deletions src/main/java/org/folio/dew/utils/Constants.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ public class Constants {
public static final String STATUS_FIELD_CAN_NOT_CLEARED = "Status field can not be cleared";
public static final String STATUS_VALUE_NOT_ALLOWED = "New status value \"%s\" is not allowed";
public static final String MULTIPLE_MATCHES_MESSAGE = "Multiple matches for the same identifier.";
public static final String NO_MARC_CONTENT = "Cannot get marc content for record with id = %s, reason: %s";

public static final String MODULE_NAME = "BULKEDIT";
public static final String BULKEDIT_DIR_NAME = "bulk_edit";
Expand Down
8 changes: 0 additions & 8 deletions src/main/resources/swagger.api/bulk-edit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -683,14 +683,6 @@ components:
$ref: '../../../../folio-export-common/schemas/inventory/identifierTypeReferenceCollection.json#/IdentifierTypeReferenceCollection'
InstanceNoteType:
$ref: '../../../../folio-export-common/schemas/inventory/instanceNoteType.json#/InstanceNoteType'
RawRecord:
$ref: '../../../../folio-export-common/schemas/srs/rawRecord.json#/RawRecord'
MarcRecord:
$ref: '../../../../folio-export-common/schemas/srs/marcRecord.json#/MarcRecord'
SrsRecord:
$ref: '../../../../folio-export-common/schemas/srs/srsRecord.json#/SrsRecord'
SrsRecordCollection:
$ref: '../../../../folio-export-common/schemas/srs/srsRecordCollection.json#/SrsRecordCollection'
examples:
errors:
value:
Expand Down
54 changes: 53 additions & 1 deletion src/test/java/org/folio/dew/BulkEditTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class BulkEditTest extends BaseBatchTest {
private static final String ITEM_BARCODES_CSV = "src/test/resources/upload/item_barcodes.csv";
private static final String INSTANCE_HRIDS_CSV = "src/test/resources/upload/instance_hrids.csv";
private static final String MARC_INSTANCE_ID_CSV = "src/test/resources/upload/marc_instance_id.csv";
private static final String MARC_INSTANCE_ID_INVALID_CONTENT_CSV = "src/test/resources/upload/marc_instance_id_invalid_content.csv";
private static final String MARC_INSTANCE_HRID_CSV = "src/test/resources/upload/marc_instance_hrid.csv";
private static final String INSTANCE_ISSN_ISBN_CSV = "src/test/resources/upload/instance_ISSN_ISBN.csv";
private static final String ITEM_BARCODES_DOUBLE_QOUTES_CSV = "src/test/resources/upload/item_barcodes_double_qoutes.csv";
Expand Down Expand Up @@ -324,7 +325,58 @@ void uploadMarcInstanceIdentifiersJobTest(String identifierType, String path) th

final FileSystemResource actualResult = actualFileOutput(jobExecution.getExecutionContext().getString(OUTPUT_FILES_IN_STORAGE).split(";")[3]);

assertEquals("marc content", new String(actualResult.getContentAsByteArray()));
assertEquals("00026nam a2200025 a 4500\u001E\u001D", new String(actualResult.getContentAsByteArray()));
assertThat(jobExecution.getExitStatus()).isEqualTo(ExitStatus.COMPLETED);
}

@Test
void uploadMarcInstanceIdentifiersInvalidContentJobTest() throws Exception {

var path = MARC_INSTANCE_ID_INVALID_CONTENT_CSV;
JobLauncherTestUtils testLauncher = createTestLauncher(bulkEditProcessInstanceIdentifiersJob);

var parametersBuilder = new JobParametersBuilder();
String jobId = UUID.randomUUID().toString();
String workDir = getWorkingDirectory(springApplicationName, BULKEDIT_DIR_NAME);
parametersBuilder.addString(TEMP_OUTPUT_MARC_PATH, workDir + jobId + "/" + "marc_instance_id");
parametersBuilder.addString(TEMP_LOCAL_MARC_PATH,
getTempDirWithSeparatorSuffix() + springApplicationName + PATH_SEPARATOR + jobId + PATH_SEPARATOR + "marc_instance_id");
parametersBuilder.addString(TEMP_LOCAL_FILE_PATH,
getTempDirWithSeparatorSuffix() + springApplicationName + PATH_SEPARATOR + jobId + PATH_SEPARATOR + "out");
parametersBuilder.addString(TEMP_OUTPUT_FILE_PATH, workDir + jobId + "/" + "out");
try {
localFilesStorage.write(workDir + "marc_instance_id", new byte[32]);
localFilesStorage.write(workDir+ jobId + "/marc_instance_id.mrc", new byte[32]);
localFilesStorage.write(workDir + "out", new byte[0]);
localFilesStorage.write(workDir + "out.csv", new byte[0]);
} catch (Exception e) {
fail(e.getMessage());
}
Path of = Path.of(path);
var file = getWorkingDirectory("mod-data-export-worker", BULKEDIT_DIR_NAME) +
FilenameUtils.removeExtension((new File(path)).getName()) + "E" + FilenameUtils.getExtension(path);
parametersBuilder.addString(FILE_NAME, file);
localFilesStorage.write(file, Files.readAllBytes(of));
parametersBuilder.addLong(TOTAL_CSV_LINES, countLines(localFilesStorage, file, false), false);

var tempDir = getTempDirWithSeparatorSuffix() + springApplicationName + PATH_SEPARATOR + jobId;
var tempFile = tempDir + PATH_SEPARATOR + of.getFileName();
Files.createDirectories(Path.of(tempDir));
Files.write(Path.of(tempFile), Files.readAllBytes(of));
parametersBuilder.addString(TEMP_IDENTIFIERS_FILE_NAME, tempFile);

parametersBuilder.addString(JobParameterNames.JOB_ID, jobId);
parametersBuilder.addString(EXPORT_TYPE, BULK_EDIT_IDENTIFIERS.getValue());
parametersBuilder.addString(ENTITY_TYPE, INSTANCE.getValue());
parametersBuilder.addString(IDENTIFIER_TYPE, "ID");

final JobParameters jobParameters = parametersBuilder.toJobParameters();

JobExecution jobExecution = testLauncher.launchJob(jobParameters);

final FileSystemResource actualResult = actualFileOutput(jobExecution.getExecutionContext().getString(OUTPUT_FILES_IN_STORAGE).split(";")[3]);

assertEquals("", new String(actualResult.getContentAsByteArray()).trim());
assertThat(jobExecution.getExitStatus()).isEqualTo(ExitStatus.COMPLETED);
}

Expand Down
13 changes: 13 additions & 0 deletions src/test/resources/mappings/instances-query.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,19 @@
}
}
},
{
"request": {
"method": "GET",
"url": "/inventory/instances?query=id%3D%3D7772796a-b88b-4991-a9f7-2e368217c487&limit=1"
},
"response": {
"status": 200,
"body": "{\n \"instances\": [\n {\n \"id\": \"7772796a-b88b-4991-a9f7-2e368217c487\",\n \"_version\": \"3\",\n \"hrid\": \"inst000000000022\",\n \"source\": \"MARC\",\n \"title\": \"American Bar Association journal.\",\n \"administrativeNotes\": [],\n \"indexTitle\": \"American Bar Association journal.\",\n \"parentInstances\": [],\n \"childInstances\": [],\n \"isBoundWith\": false,\n \"alternativeTitles\": [],\n \"editions\": [],\n \"series\": [],\n \"identifiers\": [],\n \"contributors\": [\n {\n \"authorityId\": null,\n \"contributorNameTypeId\": \"d376e36c-b759-4fed-8502-7130d1eeff39\",\n \"name\": \"American Bar Association\",\n \"contributorTypeId\": \"6e09d47d-95e2-4d8a-831b-f777b8ef6d81\",\n \"contributorTypeText\": \"\",\n \"primary\": null\n },\n {\n \"authorityId\": null,\n \"contributorNameTypeId\": \"d376e36c-b759-4fed-8502-7130d1eeff39\",\n \"name\": \"American Bar Association. Journal\",\n \"contributorTypeId\": \"06b2cbd8-66bf-4956-9d90-97c9776365a4\",\n \"contributorTypeText\": \"\",\n \"primary\": null\n }\n ],\n \"subjects\": [],\n \"classifications\": [],\n \"publication\": [],\n \"publicationFrequency\": [\n \"Monthly, 1921-83\",\n \"Quarterly, 1915-20\"\n ],\n \"publicationRange\": [\n \"Began with vol. 1, no. 1 (Jan. 1915); ceased with v. 69, [no.12] (Dec. 1983)\"\n ],\n \"electronicAccess\": [],\n \"instanceTypeId\": \"30fffe0e-e985-4144-b2e2-1e8179bdb41f\",\n \"instanceFormatIds\": [\n \"5cb91d15-96b1-4b8a-bf60-ec310538da66\"\n ],\n \"physicalDescriptions\": [\n \"69 v. : ill. ; 23-30 cm.\"\n ],\n \"languages\": [\n \"eng\"\n ],\n \"notes\": [],\n \"previouslyHeld\": false,\n \"discoverySuppress\": false,\n \"statisticalCodeIds\": [],\n \"metadata\": {\n \"createdDate\": \"2023-11-10T11:54:16.187+00:00\",\n \"createdByUserId\": \"cffb2565-07fc-470b-86c6-17d8ce14432e\",\n \"updatedDate\": \"2024-01-05T09:52:26.651+00:00\",\n \"updatedByUserId\": \"ca6022de-2644-46fe-b6f2-78df15483721\"\n },\n \"tags\": {\n \"tagList\": []\n },\n \"natureOfContentTermIds\": [\n\t\t\t\t\"921e6d93-bafb-4a02-b62f-dcd027c45406\"\n\t\t\t],\n \"precedingTitles\": [],\n \"succeedingTitles\": []\n }\n ],\n \"totalRecords\": 1\n}",
"headers": {
"Content-Type": "application/json"
}
}
},
{
"request": {
"method": "GET",
Expand Down
17 changes: 17 additions & 0 deletions src/test/resources/mappings/srs-record-invalid-content.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"mappings": [
{
"request": {
"method": "GET",
"url": "/source-storage/source-records?instanceId=7772796a-b88b-4991-a9f7-2e368217c487&idType=INSTANCE"
},
"response": {
"status": 200,
"body": "{\n \"sourceRecords\": [\n {\n \"recordId\": \"777fad9e-7f8e-4d8e-9a71-00d251817866\", \"parsedRecord\": {\n \"id\": \"8e5ea07d-0c06-4a3b-ab34-f4fc3f76bc09\",\n \"conten\": {\"ghf\": \"marc content\"} }}\n ],\n \"totalRecords\": 1\n}",
"headers": {
"Content-Type": "application/json"
}
}
}
]
}
2 changes: 1 addition & 1 deletion src/test/resources/mappings/srs-records.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
},
"response": {
"status": 200,
"body": "{\n \"sourceRecords\": [\n {\n \"recordId\": \"666fad9e-7f8e-4d8e-9a71-00d251817866\" }\n ],\n \"totalRecords\": 1\n}",
"body": "{\n \"sourceRecords\": [\n {\n \"recordId\": \"666fad9e-7f8e-4d8e-9a71-00d251817866\", \"parsedRecord\": {\n \"id\": \"8e5ea07d-0c06-4a3b-ab34-f4fc3f76bc09\",\n \"content\": {\"000\": \"marc content\"} }\n }\n ],\n \"totalRecords\": 1\n}",
"headers": {
"Content-Type": "application/json"
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7772796a-b88b-4991-a9f7-2e368217c487

0 comments on commit 693e7dd

Please sign in to comment.