From d0a72208e2513bcee564f4d500e8607b74ab1dcd Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Tue, 14 Jan 2025 15:59:44 +0000 Subject: [PATCH] Implement chunking for IcatClient.getDatafiles #50 --- src/main/config/run.properties.example | 4 + .../org/icatproject/topcat/IcatClient.java | 155 +++++++++++------- .../topcat/web/rest/UserResource.java | 30 ++-- src/test/resources/run.properties | 4 + 4 files changed, 112 insertions(+), 81 deletions(-) diff --git a/src/main/config/run.properties.example b/src/main/config/run.properties.example index ee0c66c8..b5be98ea 100644 --- a/src/main/config/run.properties.example +++ b/src/main/config/run.properties.example @@ -99,3 +99,7 @@ queue.maxActiveDownloads = 10 # Downloads based on their fileCount up to this limit. If a single Dataset has a fileCount # greater than this limit, it will still be submitted in a part by itself. queue.maxFileCount = 10000 + +# Configurable limit for the length of the GET URL for requesting Datafiles by a list of file locations +# The exact limit may depend on the server +getUrlLimit=1024 diff --git a/src/main/java/org/icatproject/topcat/IcatClient.java b/src/main/java/org/icatproject/topcat/IcatClient.java index d01382d0..344d6cad 100644 --- a/src/main/java/org/icatproject/topcat/IcatClient.java +++ b/src/main/java/org/icatproject/topcat/IcatClient.java @@ -5,7 +5,7 @@ import java.util.List; import java.util.ListIterator; import java.util.ArrayList; - +import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import org.icatproject.topcat.httpclient.*; @@ -22,17 +22,17 @@ public class IcatClient { private Logger logger = LoggerFactory.getLogger(IcatClient.class); - private HttpClient httpClient; - private String sessionId; + private HttpClient httpClient; + private String sessionId; public IcatClient(String url) { - this.httpClient = new HttpClient(url + "/icat"); - } + this.httpClient = new HttpClient(url + "/icat"); + } public IcatClient(String url, String sessionId) { - this(url); - this.sessionId = sessionId; - } + this(url); + this.sessionId = sessionId; + } /** * Login to create a session @@ -44,7 +44,7 @@ public IcatClient(String url, String sessionId) { * @throws BadRequestException */ public String login(String jsonString) throws BadRequestException { - try { + try { Response response = httpClient.post("session", new HashMap(), jsonString); return response.toString(); } catch (Exception e) { @@ -52,21 +52,21 @@ public String login(String jsonString) throws BadRequestException { } } - public String getUserName() throws TopcatException { - try { - Response response = httpClient.get("session/" + sessionId, new HashMap()); - if(response.getCode() == 404){ - throw new NotFoundException("Could not run getUserName got a 404 response"); - } else if(response.getCode() >= 400){ - throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message")); - } - return Utils.parseJsonObject(response.toString()).getString("userName"); - } catch (TopcatException e){ - throw e; - } catch (Exception e){ - throw new BadRequestException(e.getMessage()); - } - } + public String getUserName() throws TopcatException { + try { + Response response = httpClient.get("session/" + sessionId, new HashMap()); + if(response.getCode() == 404){ + throw new NotFoundException("Could not run getUserName got a 404 response"); + } else if(response.getCode() >= 400){ + throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message")); + } + return Utils.parseJsonObject(response.toString()).getString("userName"); + } catch (TopcatException e){ + throw e; + } catch (Exception e){ + throw new BadRequestException(e.getMessage()); + } + } public Boolean isAdmin() throws TopcatException { try { @@ -92,29 +92,29 @@ public String getFullName() throws TopcatException { try { String query = "select user.fullName from User user where user.name = :user"; String url = "entityManager?sessionId=" + URLEncoder.encode(sessionId, "UTF8") + "&query=" + URLEncoder.encode(query, "UTF8"); - Response response = httpClient.get(url, new HashMap()); - - if(response.getCode() == 404){ - logger.error("IcatClient.getFullName: got a 404 response"); - throw new NotFoundException("Could not run getFullName got a 404 response"); - } else if(response.getCode() >= 400){ - String message = Utils.parseJsonObject(response.toString()).getString("message"); - logger.error("IcatClient.getFullName: got a " + response.getCode() + " response: " + message); - throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message")); - } - - JsonArray responseArray = Utils.parseJsonArray(response.toString()); - if( responseArray.size() == 0 || responseArray.isNull(0) ){ - logger.warn("IcatClient.getFullName: client returned no or null result, so returning userName"); - return getUserName(); - } else { - return responseArray.getString(0); - } - } catch (TopcatException e){ - throw e; - } catch (Exception e){ - throw new BadRequestException(e.getMessage()); - } + Response response = httpClient.get(url, new HashMap()); + + if(response.getCode() == 404){ + logger.error("IcatClient.getFullName: got a 404 response"); + throw new NotFoundException("Could not run getFullName got a 404 response"); + } else if(response.getCode() >= 400){ + String message = Utils.parseJsonObject(response.toString()).getString("message"); + logger.error("IcatClient.getFullName: got a " + response.getCode() + " response: " + message); + throw new BadRequestException(Utils.parseJsonObject(response.toString()).getString("message")); + } + + JsonArray responseArray = Utils.parseJsonArray(response.toString()); + if( responseArray.size() == 0 || responseArray.isNull(0) ){ + logger.warn("IcatClient.getFullName: client returned no or null result, so returning userName"); + return getUserName(); + } else { + return responseArray.getString(0); + } + } catch (TopcatException e){ + throw e; + } catch (Exception e){ + throw new BadRequestException(e.getMessage()); + } } /** @@ -132,24 +132,53 @@ public JsonArray getDatasets(String visitId) throws TopcatException { } /** - * Get all Datafiles in the list of file locations. + * Get all Datafiles in the list of file locations, chunking to avoid a GET request + * which exceeds the configurable limit. * * @param files List of ICAT Datafile.locations - * @return JsonArray of Datafile ids. + * @return List of Datafile ids. * @throws TopcatException + * @throws UnsupportedEncodingException */ - public JsonArray getDatafiles(List files) throws TopcatException { - StringBuilder stringBuilder = new StringBuilder(); - ListIterator fileIterator = files.listIterator(); - stringBuilder.append("'" + fileIterator.next() + "'"); - fileIterator.forEachRemaining(file -> { - stringBuilder.append(","); - stringBuilder.append("'" + file + "'"); - }); - String formattedFiles = stringBuilder.toString(); - String query = "SELECT datafile.id from Datafile datafile"; - query += " WHERE datafile.location in (" + formattedFiles + ") ORDER BY datafile.id"; - return submitQuery(query); + public List getDatafiles(List files) throws TopcatException, UnsupportedEncodingException { + List datafileIds = new ArrayList<>(); + if (files.size() == 0) { + // Ensure that we don't error when calling .next() below by returning early + return datafileIds; + } + + // Total limit - "entityManager?sessionId=" - `sessionId` - "?query=" - `queryPrefix` - `querySuffix + // Limit is 1024 - 24 - 36 - 7 - 51 - 17 + int getUrlLimit = Integer.parseInt(Properties.getInstance().getProperty("getUrlLimit", "1024")); + int chunkLimit = getUrlLimit - 135; + String queryPrefix = "SELECT d.id from Datafile d WHERE d.location in ("; + String querySuffix = ") ORDER BY d.id"; + ListIterator iterator = files.listIterator(); + + String chunkedFiles = "'" + iterator.next() + "'"; + int chunkSize = URLEncoder.encode(chunkedFiles, "UTF8").length(); + while (iterator.hasNext()) { + String file = "'" + iterator.next() + "'"; + int encodedFileLength = URLEncoder.encode(file, "UTF8").length(); + if (chunkSize + 3 + encodedFileLength > chunkLimit) { + JsonArray jsonArray = submitQuery(queryPrefix + chunkedFiles + querySuffix); + for (JsonNumber datafileIdJsonNumber : jsonArray.getValuesAs(JsonNumber.class)) { + datafileIds.add(datafileIdJsonNumber.longValueExact()); + } + + chunkedFiles = file; + chunkSize = encodedFileLength; + } else { + chunkedFiles += "," + file; + chunkSize += 3 + encodedFileLength; // 3 is size of , when encoded as %2C + } + } + JsonArray jsonArray = submitQuery(queryPrefix + chunkedFiles + querySuffix); + for (JsonNumber datafileIdJsonNumber : jsonArray.getValuesAs(JsonNumber.class)) { + datafileIds.add(datafileIdJsonNumber.longValueExact()); + } + + return datafileIds; } /** @@ -168,10 +197,10 @@ public long getDatasetFileCount(long datasetId) throws TopcatException { } /** - * Utility method for submitting an unformatted query to the entityManager + * Utility method for submitting an unencoded query to the entityManager * endpoint, and returning the resultant JsonArray. * - * @param query Unformatted String query to submit + * @param query Unencoded String query to submit * @return JsonArray of results, contents will depend on the query. * @throws TopcatException */ diff --git a/src/main/java/org/icatproject/topcat/web/rest/UserResource.java b/src/main/java/org/icatproject/topcat/web/rest/UserResource.java index 8dbdcf38..b1792fdf 100644 --- a/src/main/java/org/icatproject/topcat/web/rest/UserResource.java +++ b/src/main/java/org/icatproject/topcat/web/rest/UserResource.java @@ -1,5 +1,6 @@ package org.icatproject.topcat.web.rest; +import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.text.ParseException; import java.util.ArrayList; @@ -864,7 +865,6 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName, long downloadFileCount = 0L; List downloadItems = new ArrayList(); List downloads = new ArrayList(); - // String filename = formatQueuedFilename(facilityName, visitId, part); Download newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email); for (JsonValue dataset : datasets) { @@ -879,13 +879,9 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName, if (downloadFileCount > 0L && downloadFileCount + datasetFileCount > queueMaxFileCount) { newDownload.setDownloadItems(downloadItems); downloads.add(newDownload); - // downloadId = submitDownload(idsClient, download, DownloadStatus.PAUSED); - // jsonArrayBuilder.add(downloadId); - // part += 1L; downloadFileCount = 0L; downloadItems = new ArrayList(); - // filename = formatQueuedFilename(facilityName, visitId, part); newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email); } @@ -895,8 +891,7 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName, } newDownload.setDownloadItems(downloadItems); downloads.add(newDownload); - // downloadId = submitDownload(idsClient, download, DownloadStatus.PAUSED); - // jsonArrayBuilder.add(downloadId); + int part = 1; for (Download download : downloads) { String filename = formatQueuedFilename(facilityName, visitId, part, downloads.size()); @@ -920,14 +915,15 @@ public Response queueVisitId(@FormParam("facilityName") String facilityName, * @param files ICAT Datafile.locations to download * @return Array of Download ids * @throws TopcatException + * @throws UnsupportedEncodingException */ @POST - @Path("/queue/{facilityName}/files") - public Response queueFiles(@PathParam("facilityName") String facilityName, + @Path("/queue/files") + public Response queueFiles(@FormParam("facilityName") String facilityName, @FormParam("sessionId") String sessionId, @FormParam("transport") String transport, - @FormParam("email") String email, @FormParam("files") List files) throws TopcatException { + @FormParam("email") String email, @FormParam("files") List files) throws TopcatException, UnsupportedEncodingException { - logger.info("queueVisitId called"); + logger.info("queueFiles called"); validateTransport(transport); if (files.size() == 0) { throw new BadRequestException("At least one Datafile.location required"); @@ -941,31 +937,29 @@ public Response queueFiles(@PathParam("facilityName") String facilityName, // If we wanted to block the user, this is where we would do it String userName = icatClient.getUserName(); String fullName = icatClient.getFullName(); - JsonArray datafiles = icatClient.getDatafiles(files); + List datafileIds = icatClient.getDatafiles(files); long downloadId; JsonArrayBuilder jsonArrayBuilder = Json.createArrayBuilder(); - long downloadFileCount = 0; + long downloadFileCount = 0L; List downloadItems = new ArrayList(); List downloads = new ArrayList(); Download newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email); - for (JsonNumber datafileIdJsonNumber : datafiles.getValuesAs(JsonNumber.class)) { - long datafileId = datafileIdJsonNumber.longValueExact(); - + for (long datafileId : datafileIds) { if (downloadFileCount >= queueMaxFileCount) { newDownload.setDownloadItems(downloadItems); downloads.add(newDownload); - downloadFileCount = 0; + downloadFileCount = 0L; downloadItems = new ArrayList(); newDownload = createDownload(sessionId, facilityName, "", userName, fullName, transport, email); } DownloadItem downloadItem = createDownloadItem(newDownload, datafileId, EntityType.datafile); downloadItems.add(downloadItem); - downloadFileCount += 1; + downloadFileCount += 1L; } newDownload.setDownloadItems(downloadItems); downloads.add(newDownload); diff --git a/src/test/resources/run.properties b/src/test/resources/run.properties index 30c5b690..bc076a13 100644 --- a/src/test/resources/run.properties +++ b/src/test/resources/run.properties @@ -12,3 +12,7 @@ ids.timeout=10s test.disableDownloadStatusChecks = true # Test data has 100 files per Dataset, set this to a small number to ensure coverage of the batching logic queue.maxFileCount = 1 + +# Each get request for Datafiles has a minimum size of 135, each of 3 locations is ~25 +# A value of 200 allows us to chunk this into one chunk of 2, and a second chunk of 1, hitting both branches of the code +getUrlLimit=200