Skip to content

Commit

Permalink
Implement chunking for IcatClient.getDatafiles #50
Browse files Browse the repository at this point in the history
  • Loading branch information
patrick-austin committed Jan 14, 2025
1 parent f03c34f commit d0a7220
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 81 deletions.
4 changes: 4 additions & 0 deletions src/main/config/run.properties.example
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,7 @@ queue.maxActiveDownloads = 10
# Downloads based on their fileCount up to this limit. If a single Dataset has a fileCount
# greater than this limit, it will still be submitted in a part by itself.
queue.maxFileCount = 10000

# Configurable limit for the length of the GET URL for requesting Datafiles by a list of file locations
# The exact limit may depend on the server
getUrlLimit=1024
155 changes: 92 additions & 63 deletions src/main/java/org/icatproject/topcat/IcatClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import java.util.List;
import java.util.ListIterator;
import java.util.ArrayList;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

import org.icatproject.topcat.httpclient.*;
Expand All @@ -22,17 +22,17 @@ public class IcatClient {

private Logger logger = LoggerFactory.getLogger(IcatClient.class);

private HttpClient httpClient;
private String sessionId;
private HttpClient httpClient;
private String sessionId;

public IcatClient(String url) {
this.httpClient = new HttpClient(url + "/icat");
}
this.httpClient = new HttpClient(url + "/icat");
}

public IcatClient(String url, String sessionId) {
this(url);
this.sessionId = sessionId;
}
this(url);
this.sessionId = sessionId;
}

/**
* Login to create a session
Expand All @@ -44,29 +44,29 @@ public IcatClient(String url, String sessionId) {
* @throws BadRequestException
*/
public String login(String jsonString) throws BadRequestException {
try {
try {
Response response = httpClient.post("session", new HashMap<String, String>(), jsonString);
return response.toString();
} catch (Exception e) {
throw new BadRequestException(e.getMessage());
}
}

public String getUserName() throws TopcatException {
try {
Response response = httpClient.get("session/" + sessionId, new HashMap<String, String>());
if(response.getCode() == 404){
throw new NotFoundException("Could not run getUserName got a 404 response");
} else if(response.getCode() >= 400){
throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message"));
}
return Utils.parseJsonObject(response.toString()).getString("userName");
} catch (TopcatException e){
throw e;
} catch (Exception e){
throw new BadRequestException(e.getMessage());
}
}
public String getUserName() throws TopcatException {
try {
Response response = httpClient.get("session/" + sessionId, new HashMap<String, String>());
if(response.getCode() == 404){
throw new NotFoundException("Could not run getUserName got a 404 response");
} else if(response.getCode() >= 400){
throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message"));
}
return Utils.parseJsonObject(response.toString()).getString("userName");
} catch (TopcatException e){
throw e;
} catch (Exception e){
throw new BadRequestException(e.getMessage());
}
}

public Boolean isAdmin() throws TopcatException {
try {
Expand All @@ -92,29 +92,29 @@ public String getFullName() throws TopcatException {
try {
String query = "select user.fullName from User user where user.name = :user";
String url = "entityManager?sessionId=" + URLEncoder.encode(sessionId, "UTF8") + "&query=" + URLEncoder.encode(query, "UTF8");
Response response = httpClient.get(url, new HashMap<String, String>());
if(response.getCode() == 404){
logger.error("IcatClient.getFullName: got a 404 response");
throw new NotFoundException("Could not run getFullName got a 404 response");
} else if(response.getCode() >= 400){
String message = Utils.parseJsonObject(response.toString()).getString("message");
logger.error("IcatClient.getFullName: got a " + response.getCode() + " response: " + message);
throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message"));
}

JsonArray responseArray = Utils.parseJsonArray(response.toString());
if( responseArray.size() == 0 || responseArray.isNull(0) ){
logger.warn("IcatClient.getFullName: client returned no or null result, so returning userName");
return getUserName();
} else {
return responseArray.getString(0);
}
} catch (TopcatException e){
throw e;
} catch (Exception e){
throw new BadRequestException(e.getMessage());
}
Response response = httpClient.get(url, new HashMap<String, String>());
if(response.getCode() == 404){
logger.error("IcatClient.getFullName: got a 404 response");
throw new NotFoundException("Could not run getFullName got a 404 response");
} else if(response.getCode() >= 400){
String message = Utils.parseJsonObject(response.toString()).getString("message");
logger.error("IcatClient.getFullName: got a " + response.getCode() + " response: " + message);
throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message"));
}

JsonArray responseArray = Utils.parseJsonArray(response.toString());
if( responseArray.size() == 0 || responseArray.isNull(0) ){
logger.warn("IcatClient.getFullName: client returned no or null result, so returning userName");
return getUserName();
} else {
return responseArray.getString(0);
}
} catch (TopcatException e){
throw e;
} catch (Exception e){
throw new BadRequestException(e.getMessage());
}
}

/**
Expand All @@ -132,24 +132,53 @@ public JsonArray getDatasets(String visitId) throws TopcatException {
}

/**
* Get all Datafiles in the list of file locations.
* Get all Datafiles in the list of file locations, chunking to avoid a GET request
* which exceeds the configurable limit.
*
* @param files List of ICAT Datafile.locations
* @return JsonArray of Datafile ids.
* @return List of Datafile ids.
* @throws TopcatException
* @throws UnsupportedEncodingException
*/
public JsonArray getDatafiles(List<String> files) throws TopcatException {
StringBuilder stringBuilder = new StringBuilder();
ListIterator<String> fileIterator = files.listIterator();
stringBuilder.append("'" + fileIterator.next() + "'");
fileIterator.forEachRemaining(file -> {
stringBuilder.append(",");
stringBuilder.append("'" + file + "'");
});
String formattedFiles = stringBuilder.toString();
String query = "SELECT datafile.id from Datafile datafile";
query += " WHERE datafile.location in (" + formattedFiles + ") ORDER BY datafile.id";
return submitQuery(query);
public List<Long> getDatafiles(List<String> files) throws TopcatException, UnsupportedEncodingException {
List<Long> datafileIds = new ArrayList<>();
if (files.size() == 0) {
// Ensure that we don't error when calling .next() below by returning early
return datafileIds;
}

// Total limit - "entityManager?sessionId=" - `sessionId` - "?query=" - `queryPrefix` - `querySuffix
// Limit is 1024 - 24 - 36 - 7 - 51 - 17
int getUrlLimit = Integer.parseInt(Properties.getInstance().getProperty("getUrlLimit", "1024"));
int chunkLimit = getUrlLimit - 135;
String queryPrefix = "SELECT d.id from Datafile d WHERE d.location in (";
String querySuffix = ") ORDER BY d.id";
ListIterator<String> iterator = files.listIterator();

String chunkedFiles = "'" + iterator.next() + "'";
int chunkSize = URLEncoder.encode(chunkedFiles, "UTF8").length();
while (iterator.hasNext()) {
String file = "'" + iterator.next() + "'";
int encodedFileLength = URLEncoder.encode(file, "UTF8").length();
if (chunkSize + 3 + encodedFileLength > chunkLimit) {
JsonArray jsonArray = submitQuery(queryPrefix + chunkedFiles + querySuffix);
for (JsonNumber datafileIdJsonNumber : jsonArray.getValuesAs(JsonNumber.class)) {
datafileIds.add(datafileIdJsonNumber.longValueExact());
}

chunkedFiles = file;
chunkSize = encodedFileLength;
} else {
chunkedFiles += "," + file;
chunkSize += 3 + encodedFileLength; // 3 is size of , when encoded as %2C
}
}
JsonArray jsonArray = submitQuery(queryPrefix + chunkedFiles + querySuffix);
for (JsonNumber datafileIdJsonNumber : jsonArray.getValuesAs(JsonNumber.class)) {
datafileIds.add(datafileIdJsonNumber.longValueExact());
}

return datafileIds;
}

/**
Expand All @@ -168,10 +197,10 @@ public long getDatasetFileCount(long datasetId) throws TopcatException {
}

/**
* Utility method for submitting an unformatted query to the entityManager
* Utility method for submitting an unencoded query to the entityManager
* endpoint, and returning the resultant JsonArray.
*
* @param query Unformatted String query to submit
* @param query Unencoded String query to submit
* @return JsonArray of results, contents will depend on the query.
* @throws TopcatException
*/
Expand Down
30 changes: 12 additions & 18 deletions src/main/java/org/icatproject/topcat/web/rest/UserResource.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.icatproject.topcat.web.rest;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.util.ArrayList;
Expand Down Expand Up @@ -864,7 +865,6 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName,
long downloadFileCount = 0L;
List<DownloadItem> downloadItems = new ArrayList<DownloadItem>();
List<Download> downloads = new ArrayList<Download>();
// String filename = formatQueuedFilename(facilityName, visitId, part);
Download newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email);

for (JsonValue dataset : datasets) {
Expand All @@ -879,13 +879,9 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName,
if (downloadFileCount > 0L && downloadFileCount + datasetFileCount > queueMaxFileCount) {
newDownload.setDownloadItems(downloadItems);
downloads.add(newDownload);
// downloadId = submitDownload(idsClient, download, DownloadStatus.PAUSED);
// jsonArrayBuilder.add(downloadId);

// part += 1L;
downloadFileCount = 0L;
downloadItems = new ArrayList<DownloadItem>();
// filename = formatQueuedFilename(facilityName, visitId, part);
newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email);
}

Expand All @@ -895,8 +891,7 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName,
}
newDownload.setDownloadItems(downloadItems);
downloads.add(newDownload);
// downloadId = submitDownload(idsClient, download, DownloadStatus.PAUSED);
// jsonArrayBuilder.add(downloadId);

int part = 1;
for (Download download : downloads) {
String filename = formatQueuedFilename(facilityName, visitId, part, downloads.size());
Expand All @@ -920,14 +915,15 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName,
* @param files ICAT Datafile.locations to download
* @return Array of Download ids
* @throws TopcatException
* @throws UnsupportedEncodingException
*/
@POST
@Path("/queue/{facilityName}/files")
public Response queueFiles(@PathParam("facilityName") String facilityName,
@Path("/queue/files")
public Response queueFiles(@FormParam("facilityName") String facilityName,
@FormParam("sessionId") String sessionId, @FormParam("transport") String transport,
@FormParam("email") String email, @FormParam("files") List<String> files) throws TopcatException {
@FormParam("email") String email, @FormParam("files") List<String> files) throws TopcatException, UnsupportedEncodingException {

logger.info("queueVisitId called");
logger.info("queueFiles called");
validateTransport(transport);
if (files.size() == 0) {
throw new BadRequestException("At least one Datafile.location required");
Expand All @@ -941,31 +937,29 @@ public Response queueFiles(@PathParam("facilityName") String facilityName,
// If we wanted to block the user, this is where we would do it
String userName = icatClient.getUserName();
String fullName = icatClient.getFullName();
JsonArray datafiles = icatClient.getDatafiles(files);
List<Long> datafileIds = icatClient.getDatafiles(files);

long downloadId;
JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder();

long downloadFileCount = 0;
long downloadFileCount = 0L;
List<DownloadItem> downloadItems = new ArrayList<DownloadItem>();
List<Download> downloads = new ArrayList<Download>();
Download newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email);

for (JsonNumber datafileIdJsonNumber : datafiles.getValuesAs(JsonNumber.class)) {
long datafileId = datafileIdJsonNumber.longValueExact();

for (long datafileId : datafileIds) {
if (downloadFileCount >= queueMaxFileCount) {
newDownload.setDownloadItems(downloadItems);
downloads.add(newDownload);

downloadFileCount = 0;
downloadFileCount = 0L;
downloadItems = new ArrayList<DownloadItem>();
newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email);
}

DownloadItem downloadItem = createDownloadItem(newDownload, datafileId, EntityType.datafile);
downloadItems.add(downloadItem);
downloadFileCount += 1;
downloadFileCount += 1L;
}
newDownload.setDownloadItems(downloadItems);
downloads.add(newDownload);
Expand Down
4 changes: 4 additions & 0 deletions src/test/resources/run.properties
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ ids.timeout=10s
test.disableDownloadStatusChecks = true
# Test data has 100 files per Dataset, set this to a small number to ensure coverage of the batching logic
queue.maxFileCount = 1

# Each get request for Datafiles has a minimum size of 135, each of 3 locations is ~25
# A value of 200 allows us to chunk this into one chunk of 2, and a second chunk of 1, hitting both branches of the code
getUrlLimit=200

0 comments on commit d0a7220

Please sign in to comment.