diff --git a/doc/requests/examples-sid.http b/doc/requests/examples-sid.http index 9ef7af8..de25304 100644 --- a/doc/requests/examples-sid.http +++ b/doc/requests/examples-sid.http @@ -92,7 +92,7 @@ Content-Type: application/json { "name": "fnr", "pattern": "**/fnr", - "func": "map-sid(keyId=papis-key-1,snapshotDate=2023-07-05)" + "func": "map-sid-ff31(keyId=papis-key-1,snapshotDate=2023-07-05)" } ] } @@ -121,7 +121,7 @@ Content-Type: application/json { "name": "fnr", "pattern": "**/fnr", - "func": "map-sid(keyId=papis-key-1)" + "func": "map-sid-ff31(keyId=papis-key-1)" } ] } diff --git a/src/main/java/no/ssb/dlp/pseudo/service/pseudo/PseudoConfigSplitter.java b/src/main/java/no/ssb/dlp/pseudo/service/pseudo/PseudoConfigSplitter.java index 4207c28..0cd568e 100644 --- a/src/main/java/no/ssb/dlp/pseudo/service/pseudo/PseudoConfigSplitter.java +++ b/src/main/java/no/ssb/dlp/pseudo/service/pseudo/PseudoConfigSplitter.java @@ -18,8 +18,11 @@ * Disclaimer: This is a stop gap "solution", used to support chaining of SID Mapping with pseudonymization. * The API contract could be changed to support multiple PseudoConfig transformations, but we need to think more aobut * exactly how this should be expressed. + * + * @deprecated This class will be removed after the SID Mapping function is rewritten */ @Singleton +@Deprecated public class PseudoConfigSplitter { public List splitIfNecessary(PseudoConfig pseudoConfig) { @@ -37,7 +40,7 @@ public List splitIfNecessary(PseudoConfig pseudoConfig) { private static PseudoConfig filterMappingRules(PseudoConfig pseudoConfig) { PseudoConfig filteredPseudoConfig = new PseudoConfig(); filteredPseudoConfig.setRules(pseudoConfig.getRules().stream() - .filter(r -> r.getFunc().startsWith(MAP_SID)) + .filter(r -> r.getFunc().startsWith(MAP_SID + "(")) .toList() ); @@ -56,7 +59,7 @@ private static String convertMapSidFuncToFf31Func(String mapSidFunc) { private static PseudoConfig replaceSidMappingRules(PseudoConfig pseudoConfig, String newName) { pseudoConfig.setRules(pseudoConfig.getRules().stream() .map(r -> { - if (r.getFunc().startsWith(MAP_SID)) { + if (r.getFunc().startsWith(MAP_SID + "(")) { return new PseudoFuncRule(r.getName(), r.getPattern(), convertMapSidFuncToFf31Func(r.getFunc())); } else { diff --git a/src/main/java/no/ssb/dlp/pseudo/service/pseudo/RecordMapProcessorFactory.java b/src/main/java/no/ssb/dlp/pseudo/service/pseudo/RecordMapProcessorFactory.java index b5cb74d..4f6b375 100644 --- a/src/main/java/no/ssb/dlp/pseudo/service/pseudo/RecordMapProcessorFactory.java +++ b/src/main/java/no/ssb/dlp/pseudo/service/pseudo/RecordMapProcessorFactory.java @@ -2,12 +2,14 @@ import jakarta.inject.Singleton; import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; import no.ssb.dapla.dlp.pseudo.func.PseudoFuncInput; import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput; import no.ssb.dapla.dlp.pseudo.func.TransformDirection; import no.ssb.dapla.dlp.pseudo.func.fpe.FpeFunc; import no.ssb.dapla.dlp.pseudo.func.map.MapFunc; import no.ssb.dapla.dlp.pseudo.func.map.MapFuncConfig; +import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException; import no.ssb.dapla.dlp.pseudo.func.tink.fpe.TinkFpeFunc; import no.ssb.dlp.pseudo.core.PseudoException; import no.ssb.dlp.pseudo.core.PseudoKeyset; @@ -35,6 +37,7 @@ @RequiredArgsConstructor @Singleton +@Slf4j public class RecordMapProcessorFactory { private final PseudoSecrets pseudoSecrets; @@ -45,7 +48,7 @@ public RecordMapProcessor newPseudonymizeRecordProcesso for (PseudoConfig config : pseudoConfigs) { final PseudoFuncs fieldPseudonymizer = newPseudoFuncs(config.getRules(), pseudoKeysetsOf(config.getKeysets())); - chain.preprocessor((f, v) -> init(fieldPseudonymizer,TransformDirection.APPLY, f, v)); + chain.preprocessor((f, v) -> init(fieldPseudonymizer, TransformDirection.APPLY, f, v)); chain.register((f, v) -> process(PSEUDONYMIZE, fieldPseudonymizer, f, v, metadataProcessor)); } return new RecordMapProcessor<>(chain, metadataProcessor); @@ -58,7 +61,7 @@ public RecordMapProcessor newDepseudonymizeRecordProces for (PseudoConfig config : pseudoConfigs) { final PseudoFuncs fieldDepseudonymizer = newPseudoFuncs(config.getRules(), pseudoKeysetsOf(config.getKeysets())); - chain.preprocessor((f, v) -> init(fieldDepseudonymizer, TransformDirection.RESTORE, f, v)); + chain.preprocessor((f, v) -> init(fieldDepseudonymizer, TransformDirection.RESTORE, f, v)); chain.register((f, v) -> process(DEPSEUDONYMIZE, fieldDepseudonymizer, f, v, metadataProcessor)); } @@ -116,7 +119,9 @@ private String process(PseudoOperation operation, } try { PseudoFuncDeclaration funcDeclaration = PseudoFuncDeclaration.fromString(match.getRule().getFunc()); - final boolean isSidMapping = funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID); + final boolean isSidMapping = funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID) + || funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID_FF31) + || funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID_DAEAD); if (operation == PSEUDONYMIZE) { PseudoFuncOutput output = match.getFunc().apply(PseudoFuncInput.of(varValue)); @@ -135,6 +140,7 @@ private String process(PseudoOperation operation, .encryptionAlgorithm(match.getFunc().getAlgorithm()) .stableIdentifierVersion(sidSnapshotDate) .stableIdentifierType(STABLE_IDENTIFIER_TYPE) + .encryptionAlgorithmParameters(funcDeclaration.getArgs()) .build()); } else { metadataProcessor.addMetadata(FieldMetadata.builder() @@ -164,6 +170,12 @@ private String process(PseudoOperation operation, PseudoFuncOutput output = match.getFunc().restore(PseudoFuncInput.of(varValue)); return output.getValue(); } + } catch (MappingNotFoundException e) { + // Unsuccessful SID-mapping + log.warn(e.getMessage()); + metadataProcessor.addMetric(FieldMetric.MISSING_SID); + metadataProcessor.addLog(e.getMessage()); + return null; } catch (Exception e) { throw new PseudoException(String.format("pseudonymize error - field='%s', originalValue='%s'", field.getPath(), varValue), e); diff --git a/src/main/java/no/ssb/dlp/pseudo/service/sid/LocalSidService.java b/src/main/java/no/ssb/dlp/pseudo/service/sid/LocalSidService.java index 8d84ef6..9fc1c94 100644 --- a/src/main/java/no/ssb/dlp/pseudo/service/sid/LocalSidService.java +++ b/src/main/java/no/ssb/dlp/pseudo/service/sid/LocalSidService.java @@ -4,6 +4,7 @@ import io.micronaut.context.annotation.Requires; import io.micronaut.core.async.publisher.Publishers; import lombok.RequiredArgsConstructor; +import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException; import no.ssb.dlp.pseudo.service.sid.local.SidCache; import org.reactivestreams.Publisher; @@ -26,7 +27,7 @@ class LocalSidService implements SidService { @Override public Publisher lookupFnr(String fnr, Optional snapshot) { String currentSnr = sidCache.getCurrentSnrForFnr(fnr) - .orElseThrow(() -> new LocalSidService.NoSidMappingFoundException("No SID matching fnr starting from=" + .orElseThrow(() -> new MappingNotFoundException("No SID matching fnr starting from=" + Strings.padEnd(fnr, 6, ' ').substring(0, 6))); return Publishers.just(sidCache.getCurrentFnrForSnr(currentSnr).map(currentFnr -> new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr).build()) @@ -36,7 +37,7 @@ public Publisher lookupFnr(String fnr, Optional snapshot) { @Override public Publisher lookupSnr(String snr, Optional snapshot) { String currentFnr = sidCache.getCurrentFnrForSnr(snr) - .orElseThrow(() -> new LocalSidService.NoSidMappingFoundException("No SID matching snr starting from=" + .orElseThrow(() -> new MappingNotFoundException("No SID matching snr starting from=" + Strings.padEnd(snr, 4, ' ').substring(0, 4))); return Publishers.just(sidCache.getCurrentSnrForFnr(currentFnr).map(currentSnr -> new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr).build()) @@ -45,30 +46,24 @@ public Publisher lookupSnr(String snr, Optional snapshot) { @Override public Publisher> lookupFnr(List fnrList, Optional snapshot) { - return Publishers.just(fnrList.stream().map(fnr -> { - String currentSnr = sidCache.getCurrentSnrForFnr(fnr).orElseThrow(() -> - new LocalSidService.NoSidMappingFoundException("No SID matching fnr starting from=" - + Strings.padEnd(fnr, 6, ' ').substring(0, 6))); - return sidCache.getCurrentFnrForSnr(currentSnr).map(currentFnr -> - new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr) - .datasetExtractionSnapshotTime(snapshot.orElse(null)).build()) - .orElse(null); - }).collect(Collectors.toMap(SidInfo::fnr, sidInfo -> sidInfo)) - ); + return Publishers.just(sidCache.getCurrentSnrList(fnrList).stream() + .map(currentSnr -> new SidInfo.SidInfoBuilder() + .snr(currentSnr) + .fnr(sidCache.getCurrentFnrForSnr(currentSnr).orElse(null)) + .datasetExtractionSnapshotTime(snapshot.orElse(null)).build() + ) + .collect(Collectors.toMap(SidInfo::fnr, sidInfo -> sidInfo))); } @Override public Publisher> lookupSnr(List snrList, Optional snapshot) { - return Publishers.just(snrList.stream().map(snr -> { - String currentFnr = sidCache.getCurrentFnrForSnr(snr).orElseThrow(() -> - new LocalSidService.NoSidMappingFoundException("No SID matching snr starting from=" - + Strings.padEnd(snr, 4, ' ').substring(0, 4))); - return sidCache.getCurrentSnrForFnr(currentFnr).map(currentSnr -> - new SidInfo.SidInfoBuilder().fnr(currentFnr).snr(currentSnr) - .datasetExtractionSnapshotTime(snapshot.orElse(null)).build()) - .orElse(null); - }).collect(Collectors.toMap(SidInfo::snr, sidInfo -> sidInfo)) - ); + return Publishers.just(sidCache.getCurrentFnrList(snrList).stream() + .map(currentFnr -> new SidInfo.SidInfoBuilder() + .fnr(currentFnr) + .snr(sidCache.getCurrentSnrForFnr(currentFnr).orElse(null)) + .datasetExtractionSnapshotTime(snapshot.orElse(null)).build() + ) + .collect(Collectors.toMap(SidInfo::snr, sidInfo -> sidInfo))); } @Override @@ -83,10 +78,4 @@ public Publisher getSnapshots() { return Publishers.just(SnapshotInfo.builder().items(List.of("2023-04-25")).build()); } - public static class NoSidMappingFoundException extends RuntimeException { - public NoSidMappingFoundException(String message) { - super(message); - } - } - } diff --git a/src/main/java/no/ssb/dlp/pseudo/service/sid/SidMapper.java b/src/main/java/no/ssb/dlp/pseudo/service/sid/SidMapper.java index c3c5b2f..869d533 100644 --- a/src/main/java/no/ssb/dlp/pseudo/service/sid/SidMapper.java +++ b/src/main/java/no/ssb/dlp/pseudo/service/sid/SidMapper.java @@ -10,6 +10,7 @@ import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput; import no.ssb.dapla.dlp.pseudo.func.map.MapFuncConfig; import no.ssb.dapla.dlp.pseudo.func.map.Mapper; +import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException; import no.ssb.dlp.pseudo.service.Application; import org.reactivestreams.Publisher; import org.reactivestreams.Subscriber; @@ -74,72 +75,62 @@ private PseudoFuncOutput mapTo(String identifier, boolean isFnr) { if (identifier == null) { return PseudoFuncOutput.of(null); } - try { - PseudoFuncOutput output; - // Execute the bulk request if necessary - if (bulkRequest.isEmpty()) { - // Split fnrs or snrs into chunks of BULK_SIZE - for (List bulkIdentifiers : Lists.partition(List.copyOf(identifiers), partitionSize)) { - log.info("Execute SID-mapping bulk request"); - final ObservableSubscriber> subscriber; - - if (isFnr) { - subscriber = ObservableSubscriber.subscribe( - sidService.lookupFnr(bulkIdentifiers, getSnapshot())); - } else { - subscriber = ObservableSubscriber.subscribe( - sidService.lookupSnr(bulkIdentifiers, getSnapshot())); - } - - for (String id : bulkIdentifiers) { - bulkRequest.put(id, subscriber); - } + // Execute the bulk request if necessary + if (bulkRequest.isEmpty()) { + // Split fnrs or snrs into chunks of BULK_SIZE + for (List bulkIdentifiers : Lists.partition(List.copyOf(identifiers), partitionSize)) { + log.info("Execute SID-mapping bulk request"); + final ObservableSubscriber> subscriber; + + if (isFnr) { + subscriber = ObservableSubscriber.subscribe( + sidService.lookupFnr(bulkIdentifiers, getSnapshot())); + } else { + subscriber = ObservableSubscriber.subscribe( + sidService.lookupSnr(bulkIdentifiers, getSnapshot())); + } + + for (String id : bulkIdentifiers) { + bulkRequest.put(id, subscriber); } } - SidInfo result = bulkRequest.get(identifier).awaitResult() - .orElseThrow(() -> new RuntimeException("SID service did not respond")).get(identifier); - - output = PseudoFuncOutput.of(isFnr ? result.snr() : result.fnr()); - createMappingLogsAndOutput(result, isFnr, identifier, output); - output.addMetadata(MapFuncConfig.Param.SNAPSHOT_DATE, result.datasetExtractionSnapshotTime()); - return output; - - } catch (LocalSidService.NoSidMappingFoundException e) { - String message = isFnr ? - String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier)) : - String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier)); - log.warn(message); - PseudoFuncOutput output = PseudoFuncOutput.of(identifier); - output.addWarning(message); - return output; } + SidInfo result = bulkRequest.get(identifier).awaitResult() + .orElseThrow(() -> new RuntimeException("SID service did not respond")) + .get(identifier); + + PseudoFuncOutput output = createMappingLogsAndOutput(result, isFnr, identifier); + output.addMetadata(MapFuncConfig.Param.SNAPSHOT_DATE, result.datasetExtractionSnapshotTime()); + return output; } - private void createMappingLogsAndOutput(SidInfo sidInfo, boolean isFnr, String identifier, PseudoFuncOutput pseudoFuncOutput) { + private PseudoFuncOutput createMappingLogsAndOutput(SidInfo sidInfo, boolean isFnr, String identifier) { //Mapping for fnr if (isFnr) { if (sidInfo == null || sidInfo.snr() == null) { - String message = String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier)); - log.warn(message); - pseudoFuncOutput.addWarning(message); + throw new MappingNotFoundException(String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier))); } else if (identifier.equals(sidInfo.snr())) { String message = String.format(INCORRECT_MATCHING_FNR, Redactor.redactFnr(identifier)); log.warn(message); - pseudoFuncOutput.addWarning(message); + PseudoFuncOutput output = PseudoFuncOutput.of(sidInfo.snr()); + output.addWarning(message); + return output; } + return PseudoFuncOutput.of(sidInfo.snr()); } //Mapping for snr else { if (sidInfo == null || sidInfo.fnr() == null) { - String message = String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier)); - log.warn(message); - pseudoFuncOutput.addWarning(message); + throw new MappingNotFoundException(String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier))); } else if (identifier.equals(sidInfo.fnr())) { String message = String.format(INCORRECT_MATCHING_SNR, Redactor.redactSnr(identifier)); log.warn(message); - pseudoFuncOutput.addWarning(message); + PseudoFuncOutput output = PseudoFuncOutput.of(sidInfo.fnr()); + output.addWarning(message); + return output; } + return PseudoFuncOutput.of(sidInfo.fnr()); } } diff --git a/src/main/java/no/ssb/dlp/pseudo/service/sid/local/SidCache.java b/src/main/java/no/ssb/dlp/pseudo/service/sid/local/SidCache.java index ce619cc..2944893 100644 --- a/src/main/java/no/ssb/dlp/pseudo/service/sid/local/SidCache.java +++ b/src/main/java/no/ssb/dlp/pseudo/service/sid/local/SidCache.java @@ -7,6 +7,7 @@ import java.time.Instant; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -58,7 +59,18 @@ public Optional getCurrentFnrForSnr(String snr) { return Optional.ofNullable(currentFnr); } - + public List getCurrentSnrList(List fnrs) { + return fnrToCurrentSnr.entrySet().stream() + .filter(entry -> fnrs.contains(entry.getKey())) + .map(Map.Entry::getValue) + .toList(); + } + public List getCurrentFnrList(List snrs) { + return snrToCurrentFnr.entrySet().stream() + .filter(entry -> snrs.contains(entry.getKey())) + .map(Map.Entry::getValue) + .toList(); + } private void validateCacheReady() throws SidIndexUnavailableException { if (state != State.INITIALIZED) { throw new SidIndexUnavailableException("SID index is not currently available. Wait a minute and retry. State=" + state); diff --git a/src/test/java/no/ssb/dlp/pseudo/service/sid/SidMapperTest.java b/src/test/java/no/ssb/dlp/pseudo/service/sid/SidMapperTest.java index e75062a..6c3160e 100644 --- a/src/test/java/no/ssb/dlp/pseudo/service/sid/SidMapperTest.java +++ b/src/test/java/no/ssb/dlp/pseudo/service/sid/SidMapperTest.java @@ -8,6 +8,7 @@ import no.ssb.dapla.dlp.pseudo.func.PseudoFuncInput; import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput; import no.ssb.dapla.dlp.pseudo.func.map.Mapper; +import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException; import no.ssb.dlp.pseudo.service.Application; import org.apache.groovy.util.Maps; import org.junit.Assert; @@ -21,6 +22,7 @@ import java.util.Optional; import java.util.ServiceLoader; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; /** @@ -182,9 +184,10 @@ public void testMapForFnrWithNoSnr(){ new RuntimeException("SidMapper class not found")); mapper.setConfig(new HashMap<>()); mapper.init(PseudoFuncInput.of("11854898346")); - PseudoFuncOutput output = mapper.map(PseudoFuncInput.of("11854898346")); + assertThrows(MappingNotFoundException.class, () -> + mapper.map(PseudoFuncInput.of("11854898346")) + ); - Assertions.assertEquals("No SID-mapping found for fnr 118548*****", output.getWarnings().getFirst()); verify(sidService, times(1)).lookupFnr(anyList(), eq(Optional.empty())); assertLogsForFnrOrSnr("11854898346", ""); } @@ -239,9 +242,9 @@ public void testMapForSnrWithNoFnr(){ new RuntimeException("SidMapper class not found")); mapper.setConfig(new HashMap<>()); mapper.init(PseudoFuncInput.of("0001ha3")); - PseudoFuncOutput output = mapper.restore(PseudoFuncInput.of("0001ha3")); - - Assertions.assertEquals("No SID-mapping found for snr 000****", output.getWarnings().getFirst()); + assertThrows(MappingNotFoundException.class, () -> + mapper.restore(PseudoFuncInput.of("0001ha3")) + ); verify(sidService, times(1)).lookupSnr(anyList(), eq(Optional.empty())); assertLogsForFnrOrSnr("11854898346", "0001ha3"); } diff --git a/src/test/resources/data/person_3_sid_deid.json b/src/test/resources/data/person_3_sid_deid.json index 986cc79..6a63879 100644 --- a/src/test/resources/data/person_3_sid_deid.json +++ b/src/test/resources/data/person_3_sid_deid.json @@ -14,7 +14,7 @@ "fodselsdato": "060970" }, { - "fnr": "BP41lBL", + "fnr": "2TdM5b9", "fornavn": "Kristoffer", "etternavn": "Pedersen", "kjonn": "M",