Skip to content

Commit

Permalink
Deprecate use of PseudoConfigSplitter. Adapt to new mapping functions (
Browse files Browse the repository at this point in the history
…#99)

* Deprecate use of PseudoConfigSplitter and some adaptations to the new mapping functions in pseudo-service-core
* Add MAP_SID_DAEAD
* Use MappingNotFoundException to handle missing SID mappinga.
* Use the function map-sid-ff31 in examples
  • Loading branch information
bjornandre authored Mar 15, 2024
1 parent 546e633 commit 4d8970c
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 87 deletions.
4 changes: 2 additions & 2 deletions doc/requests/examples-sid.http
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ Content-Type: application/json
{
"name": "fnr",
"pattern": "**/fnr",
"func": "map-sid(keyId=papis-key-1,snapshotDate=2023-07-05)"
"func": "map-sid-ff31(keyId=papis-key-1,snapshotDate=2023-07-05)"
}
]
}
Expand Down Expand Up @@ -121,7 +121,7 @@ Content-Type: application/json
{
"name": "fnr",
"pattern": "**/fnr",
"func": "map-sid(keyId=papis-key-1)"
"func": "map-sid-ff31(keyId=papis-key-1)"
}
]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
* Disclaimer: This is a stop gap "solution", used to support chaining of SID Mapping with pseudonymization.
* The API contract could be changed to support multiple PseudoConfig transformations, but we need to think more aobut
* exactly how this should be expressed.
*
* @deprecated This class will be removed after the SID Mapping function is rewritten
*/
@Singleton
@Deprecated
public class PseudoConfigSplitter {

public List<PseudoConfig> splitIfNecessary(PseudoConfig pseudoConfig) {
Expand All @@ -37,7 +40,7 @@ public List<PseudoConfig> splitIfNecessary(PseudoConfig pseudoConfig) {
private static PseudoConfig filterMappingRules(PseudoConfig pseudoConfig) {
PseudoConfig filteredPseudoConfig = new PseudoConfig();
filteredPseudoConfig.setRules(pseudoConfig.getRules().stream()
.filter(r -> r.getFunc().startsWith(MAP_SID))
.filter(r -> r.getFunc().startsWith(MAP_SID + "("))
.toList()
);

Expand All @@ -56,7 +59,7 @@ private static String convertMapSidFuncToFf31Func(String mapSidFunc) {
private static PseudoConfig replaceSidMappingRules(PseudoConfig pseudoConfig, String newName) {
pseudoConfig.setRules(pseudoConfig.getRules().stream()
.map(r -> {
if (r.getFunc().startsWith(MAP_SID)) {
if (r.getFunc().startsWith(MAP_SID + "(")) {
return new PseudoFuncRule(r.getName(), r.getPattern(), convertMapSidFuncToFf31Func(r.getFunc()));
}
else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

import jakarta.inject.Singleton;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import no.ssb.dapla.dlp.pseudo.func.PseudoFuncInput;
import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput;
import no.ssb.dapla.dlp.pseudo.func.TransformDirection;
import no.ssb.dapla.dlp.pseudo.func.fpe.FpeFunc;
import no.ssb.dapla.dlp.pseudo.func.map.MapFunc;
import no.ssb.dapla.dlp.pseudo.func.map.MapFuncConfig;
import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException;
import no.ssb.dapla.dlp.pseudo.func.tink.fpe.TinkFpeFunc;
import no.ssb.dlp.pseudo.core.PseudoException;
import no.ssb.dlp.pseudo.core.PseudoKeyset;
Expand Down Expand Up @@ -35,6 +37,7 @@

@RequiredArgsConstructor
@Singleton
@Slf4j
public class RecordMapProcessorFactory {
private final PseudoSecrets pseudoSecrets;

Expand All @@ -45,7 +48,7 @@ public RecordMapProcessor<PseudoMetadataProcessor> newPseudonymizeRecordProcesso
for (PseudoConfig config : pseudoConfigs) {
final PseudoFuncs fieldPseudonymizer = newPseudoFuncs(config.getRules(),
pseudoKeysetsOf(config.getKeysets()));
chain.preprocessor((f, v) -> init(fieldPseudonymizer,TransformDirection.APPLY, f, v));
chain.preprocessor((f, v) -> init(fieldPseudonymizer, TransformDirection.APPLY, f, v));
chain.register((f, v) -> process(PSEUDONYMIZE, fieldPseudonymizer, f, v, metadataProcessor));
}
return new RecordMapProcessor<>(chain, metadataProcessor);
Expand All @@ -58,7 +61,7 @@ public RecordMapProcessor<PseudoMetadataProcessor> newDepseudonymizeRecordProces
for (PseudoConfig config : pseudoConfigs) {
final PseudoFuncs fieldDepseudonymizer = newPseudoFuncs(config.getRules(),
pseudoKeysetsOf(config.getKeysets()));
chain.preprocessor((f, v) -> init(fieldDepseudonymizer, TransformDirection.RESTORE, f, v));
chain.preprocessor((f, v) -> init(fieldDepseudonymizer, TransformDirection.RESTORE, f, v));
chain.register((f, v) -> process(DEPSEUDONYMIZE, fieldDepseudonymizer, f, v, metadataProcessor));
}

Expand Down Expand Up @@ -116,7 +119,9 @@ private String process(PseudoOperation operation,
}
try {
PseudoFuncDeclaration funcDeclaration = PseudoFuncDeclaration.fromString(match.getRule().getFunc());
final boolean isSidMapping = funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID);
final boolean isSidMapping = funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID)
|| funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID_FF31)
|| funcDeclaration.getFuncName().equals(PseudoFuncNames.MAP_SID_DAEAD);

if (operation == PSEUDONYMIZE) {
PseudoFuncOutput output = match.getFunc().apply(PseudoFuncInput.of(varValue));
Expand All @@ -135,6 +140,7 @@ private String process(PseudoOperation operation,
.encryptionAlgorithm(match.getFunc().getAlgorithm())
.stableIdentifierVersion(sidSnapshotDate)
.stableIdentifierType(STABLE_IDENTIFIER_TYPE)
.encryptionAlgorithmParameters(funcDeclaration.getArgs())
.build());
} else {
metadataProcessor.addMetadata(FieldMetadata.builder()
Expand Down Expand Up @@ -164,6 +170,12 @@ private String process(PseudoOperation operation,
PseudoFuncOutput output = match.getFunc().restore(PseudoFuncInput.of(varValue));
return output.getValue();
}
} catch (MappingNotFoundException e) {
// Unsuccessful SID-mapping
log.warn(e.getMessage());
metadataProcessor.addMetric(FieldMetric.MISSING_SID);
metadataProcessor.addLog(e.getMessage());
return null;
} catch (Exception e) {
throw new PseudoException(String.format("pseudonymize error - field='%s', originalValue='%s'",
field.getPath(), varValue), e);
Expand Down
45 changes: 17 additions & 28 deletions src/main/java/no/ssb/dlp/pseudo/service/sid/LocalSidService.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import io.micronaut.context.annotation.Requires;
import io.micronaut.core.async.publisher.Publishers;
import lombok.RequiredArgsConstructor;
import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException;
import no.ssb.dlp.pseudo.service.sid.local.SidCache;
import org.reactivestreams.Publisher;

Expand All @@ -26,7 +27,7 @@ class LocalSidService implements SidService {
@Override
public Publisher<SidInfo> lookupFnr(String fnr, Optional<String> snapshot) {
String currentSnr = sidCache.getCurrentSnrForFnr(fnr)
.orElseThrow(() -> new LocalSidService.NoSidMappingFoundException("No SID matching fnr starting from="
.orElseThrow(() -> new MappingNotFoundException("No SID matching fnr starting from="
+ Strings.padEnd(fnr, 6, ' ').substring(0, 6)));
return Publishers.just(sidCache.getCurrentFnrForSnr(currentSnr).map(currentFnr ->
new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr).build())
Expand All @@ -36,7 +37,7 @@ public Publisher<SidInfo> lookupFnr(String fnr, Optional<String> snapshot) {
@Override
public Publisher<SidInfo> lookupSnr(String snr, Optional<String> snapshot) {
String currentFnr = sidCache.getCurrentFnrForSnr(snr)
.orElseThrow(() -> new LocalSidService.NoSidMappingFoundException("No SID matching snr starting from="
.orElseThrow(() -> new MappingNotFoundException("No SID matching snr starting from="
+ Strings.padEnd(snr, 4, ' ').substring(0, 4)));
return Publishers.just(sidCache.getCurrentSnrForFnr(currentFnr).map(currentSnr ->
new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr).build())
Expand All @@ -45,30 +46,24 @@ public Publisher<SidInfo> lookupSnr(String snr, Optional<String> snapshot) {

@Override
public Publisher<Map<String, SidInfo>> lookupFnr(List<String> fnrList, Optional<String> snapshot) {
return Publishers.just(fnrList.stream().map(fnr -> {
String currentSnr = sidCache.getCurrentSnrForFnr(fnr).orElseThrow(() ->
new LocalSidService.NoSidMappingFoundException("No SID matching fnr starting from="
+ Strings.padEnd(fnr, 6, ' ').substring(0, 6)));
return sidCache.getCurrentFnrForSnr(currentSnr).map(currentFnr ->
new SidInfo.SidInfoBuilder().snr(currentSnr).fnr(currentFnr)
.datasetExtractionSnapshotTime(snapshot.orElse(null)).build())
.orElse(null);
}).collect(Collectors.toMap(SidInfo::fnr, sidInfo -> sidInfo))
);
return Publishers.just(sidCache.getCurrentSnrList(fnrList).stream()
.map(currentSnr -> new SidInfo.SidInfoBuilder()
.snr(currentSnr)
.fnr(sidCache.getCurrentFnrForSnr(currentSnr).orElse(null))
.datasetExtractionSnapshotTime(snapshot.orElse(null)).build()
)
.collect(Collectors.toMap(SidInfo::fnr, sidInfo -> sidInfo)));
}

@Override
public Publisher<Map<String, SidInfo>> lookupSnr(List<String> snrList, Optional<String> snapshot) {
return Publishers.just(snrList.stream().map(snr -> {
String currentFnr = sidCache.getCurrentFnrForSnr(snr).orElseThrow(() ->
new LocalSidService.NoSidMappingFoundException("No SID matching snr starting from="
+ Strings.padEnd(snr, 4, ' ').substring(0, 4)));
return sidCache.getCurrentSnrForFnr(currentFnr).map(currentSnr ->
new SidInfo.SidInfoBuilder().fnr(currentFnr).snr(currentSnr)
.datasetExtractionSnapshotTime(snapshot.orElse(null)).build())
.orElse(null);
}).collect(Collectors.toMap(SidInfo::snr, sidInfo -> sidInfo))
);
return Publishers.just(sidCache.getCurrentFnrList(snrList).stream()
.map(currentFnr -> new SidInfo.SidInfoBuilder()
.fnr(currentFnr)
.snr(sidCache.getCurrentSnrForFnr(currentFnr).orElse(null))
.datasetExtractionSnapshotTime(snapshot.orElse(null)).build()
)
.collect(Collectors.toMap(SidInfo::snr, sidInfo -> sidInfo)));
}

@Override
Expand All @@ -83,10 +78,4 @@ public Publisher<SnapshotInfo> getSnapshots() {
return Publishers.just(SnapshotInfo.builder().items(List.of("2023-04-25")).build());
}

public static class NoSidMappingFoundException extends RuntimeException {
public NoSidMappingFoundException(String message) {
super(message);
}
}

}
81 changes: 36 additions & 45 deletions src/main/java/no/ssb/dlp/pseudo/service/sid/SidMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput;
import no.ssb.dapla.dlp.pseudo.func.map.MapFuncConfig;
import no.ssb.dapla.dlp.pseudo.func.map.Mapper;
import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException;
import no.ssb.dlp.pseudo.service.Application;
import org.reactivestreams.Publisher;
import org.reactivestreams.Subscriber;
Expand Down Expand Up @@ -74,72 +75,62 @@ private PseudoFuncOutput mapTo(String identifier, boolean isFnr) {
if (identifier == null) {
return PseudoFuncOutput.of(null);
}
try {
PseudoFuncOutput output;
// Execute the bulk request if necessary
if (bulkRequest.isEmpty()) {
// Split fnrs or snrs into chunks of BULK_SIZE
for (List<String> bulkIdentifiers : Lists.partition(List.copyOf(identifiers), partitionSize)) {
log.info("Execute SID-mapping bulk request");
final ObservableSubscriber<Map<String, SidInfo>> subscriber;

if (isFnr) {
subscriber = ObservableSubscriber.subscribe(
sidService.lookupFnr(bulkIdentifiers, getSnapshot()));
} else {
subscriber = ObservableSubscriber.subscribe(
sidService.lookupSnr(bulkIdentifiers, getSnapshot()));
}

for (String id : bulkIdentifiers) {
bulkRequest.put(id, subscriber);
}
// Execute the bulk request if necessary
if (bulkRequest.isEmpty()) {
// Split fnrs or snrs into chunks of BULK_SIZE
for (List<String> bulkIdentifiers : Lists.partition(List.copyOf(identifiers), partitionSize)) {
log.info("Execute SID-mapping bulk request");
final ObservableSubscriber<Map<String, SidInfo>> subscriber;

if (isFnr) {
subscriber = ObservableSubscriber.subscribe(
sidService.lookupFnr(bulkIdentifiers, getSnapshot()));
} else {
subscriber = ObservableSubscriber.subscribe(
sidService.lookupSnr(bulkIdentifiers, getSnapshot()));
}

for (String id : bulkIdentifiers) {
bulkRequest.put(id, subscriber);
}
}
SidInfo result = bulkRequest.get(identifier).awaitResult()
.orElseThrow(() -> new RuntimeException("SID service did not respond")).get(identifier);

output = PseudoFuncOutput.of(isFnr ? result.snr() : result.fnr());
createMappingLogsAndOutput(result, isFnr, identifier, output);
output.addMetadata(MapFuncConfig.Param.SNAPSHOT_DATE, result.datasetExtractionSnapshotTime());
return output;

} catch (LocalSidService.NoSidMappingFoundException e) {
String message = isFnr ?
String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier)) :
String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier));
log.warn(message);
PseudoFuncOutput output = PseudoFuncOutput.of(identifier);
output.addWarning(message);
return output;
}
SidInfo result = bulkRequest.get(identifier).awaitResult()
.orElseThrow(() -> new RuntimeException("SID service did not respond"))
.get(identifier);

PseudoFuncOutput output = createMappingLogsAndOutput(result, isFnr, identifier);
output.addMetadata(MapFuncConfig.Param.SNAPSHOT_DATE, result.datasetExtractionSnapshotTime());
return output;
}


private void createMappingLogsAndOutput(SidInfo sidInfo, boolean isFnr, String identifier, PseudoFuncOutput pseudoFuncOutput) {
private PseudoFuncOutput createMappingLogsAndOutput(SidInfo sidInfo, boolean isFnr, String identifier) {
//Mapping for fnr
if (isFnr) {
if (sidInfo == null || sidInfo.snr() == null) {
String message = String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier));
log.warn(message);
pseudoFuncOutput.addWarning(message);
throw new MappingNotFoundException(String.format(NO_MATCHING_FNR, Redactor.redactFnr(identifier)));
} else if (identifier.equals(sidInfo.snr())) {
String message = String.format(INCORRECT_MATCHING_FNR, Redactor.redactFnr(identifier));
log.warn(message);
pseudoFuncOutput.addWarning(message);
PseudoFuncOutput output = PseudoFuncOutput.of(sidInfo.snr());
output.addWarning(message);
return output;
}
return PseudoFuncOutput.of(sidInfo.snr());
}
//Mapping for snr
else {
if (sidInfo == null || sidInfo.fnr() == null) {
String message = String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier));
log.warn(message);
pseudoFuncOutput.addWarning(message);
throw new MappingNotFoundException(String.format(NO_MATCHING_SNR, Redactor.redactSnr(identifier)));
} else if (identifier.equals(sidInfo.fnr())) {
String message = String.format(INCORRECT_MATCHING_SNR, Redactor.redactSnr(identifier));
log.warn(message);
pseudoFuncOutput.addWarning(message);
PseudoFuncOutput output = PseudoFuncOutput.of(sidInfo.fnr());
output.addWarning(message);
return output;
}
return PseudoFuncOutput.of(sidInfo.fnr());
}
}

Expand Down
14 changes: 13 additions & 1 deletion src/main/java/no/ssb/dlp/pseudo/service/sid/local/SidCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import java.time.Instant;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

Expand Down Expand Up @@ -58,7 +59,18 @@ public Optional<String> getCurrentFnrForSnr(String snr) {

return Optional.ofNullable(currentFnr);
}

public List<String> getCurrentSnrList(List<String> fnrs) {
return fnrToCurrentSnr.entrySet().stream()
.filter(entry -> fnrs.contains(entry.getKey()))
.map(Map.Entry::getValue)
.toList();
}
public List<String> getCurrentFnrList(List<String> snrs) {
return snrToCurrentFnr.entrySet().stream()
.filter(entry -> snrs.contains(entry.getKey()))
.map(Map.Entry::getValue)
.toList();
}
private void validateCacheReady() throws SidIndexUnavailableException {
if (state != State.INITIALIZED) {
throw new SidIndexUnavailableException("SID index is not currently available. Wait a minute and retry. State=" + state);
Expand Down
13 changes: 8 additions & 5 deletions src/test/java/no/ssb/dlp/pseudo/service/sid/SidMapperTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import no.ssb.dapla.dlp.pseudo.func.PseudoFuncInput;
import no.ssb.dapla.dlp.pseudo.func.PseudoFuncOutput;
import no.ssb.dapla.dlp.pseudo.func.map.Mapper;
import no.ssb.dapla.dlp.pseudo.func.map.MappingNotFoundException;
import no.ssb.dlp.pseudo.service.Application;
import org.apache.groovy.util.Maps;
import org.junit.Assert;
Expand All @@ -21,6 +22,7 @@
import java.util.Optional;
import java.util.ServiceLoader;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;

/**
Expand Down Expand Up @@ -182,9 +184,10 @@ public void testMapForFnrWithNoSnr(){
new RuntimeException("SidMapper class not found"));
mapper.setConfig(new HashMap<>());
mapper.init(PseudoFuncInput.of("11854898346"));
PseudoFuncOutput output = mapper.map(PseudoFuncInput.of("11854898346"));
assertThrows(MappingNotFoundException.class, () ->
mapper.map(PseudoFuncInput.of("11854898346"))
);

Assertions.assertEquals("No SID-mapping found for fnr 118548*****", output.getWarnings().getFirst());
verify(sidService, times(1)).lookupFnr(anyList(), eq(Optional.empty()));
assertLogsForFnrOrSnr("11854898346", "");
}
Expand Down Expand Up @@ -239,9 +242,9 @@ public void testMapForSnrWithNoFnr(){
new RuntimeException("SidMapper class not found"));
mapper.setConfig(new HashMap<>());
mapper.init(PseudoFuncInput.of("0001ha3"));
PseudoFuncOutput output = mapper.restore(PseudoFuncInput.of("0001ha3"));

Assertions.assertEquals("No SID-mapping found for snr 000****", output.getWarnings().getFirst());
assertThrows(MappingNotFoundException.class, () ->
mapper.restore(PseudoFuncInput.of("0001ha3"))
);
verify(sidService, times(1)).lookupSnr(anyList(), eq(Optional.empty()));
assertLogsForFnrOrSnr("11854898346", "0001ha3");
}
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/data/person_3_sid_deid.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"fodselsdato": "060970"
},
{
"fnr": "BP41lBL",
"fnr": "2TdM5b9",
"fornavn": "Kristoffer",
"etternavn": "Pedersen",
"kjonn": "M",
Expand Down

0 comments on commit 4d8970c

Please sign in to comment.