From fb1ff9c3d88a54fdc0acb2670ca25966b04fa95e Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 5 Sep 2024 08:41:39 -0700 Subject: [PATCH] indexer: fix parsing POST request when using Map for headers (#78) - add case-insensitive check for 'content-type' if headers are a Map, not Headers - remove unused properties left over on WARCRecord - bump to 2.3.1 --- package.json | 2 +- src/lib/indexer.ts | 2 -- src/lib/utils.ts | 24 ++++++++++++++++++++---- src/lib/warcrecord.ts | 2 -- 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/package.json b/package.json index fb4215a..643803b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "warcio", - "version": "2.3.0", + "version": "2.3.1", "keywords": [ "WARC", "web archiving" diff --git a/src/lib/indexer.ts b/src/lib/indexer.ts index 79d912d..8219547 100644 --- a/src/lib/indexer.ts +++ b/src/lib/indexer.ts @@ -304,8 +304,6 @@ export class CDXIndexer extends Indexer { if (postToGetUrl(request)) { requestBody = request.requestBody; - record.method = method; - record.requestBody = requestBody; url = request.url; } } diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 41d1615..7687923 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -62,7 +62,24 @@ export function postToGetUrl(request: Request) { return false; } - const requestMime = (headers.get("content-type") || "").split(";")[0]; + const getContentType = (headers: Headers | Map) : string => { + const ct = headers.get("content-type"); + if (ct) { + return ct; + } + if (!(headers instanceof Headers)) { + for (const [key, value] of headers.entries()) { + if (key && key.toLowerCase() === "content-type") { + return value; + } + } + } + return ""; + } + + const contentType = getContentType(headers); + + const requestMime = contentType.split(";")[0]; function decodeIfNeeded( postData: Uint8Array | string | undefined | null, @@ -93,13 +110,12 @@ export function postToGetUrl(request: Request) { break; case "multipart/form-data": { - const content_type = headers.get("content-type"); - if (!content_type) { + if (!contentType) { throw new Error( "utils cannot call postToGetURL when missing content-type header", ); } - query = mfdToQueryString(decodeIfNeeded(postData), content_type); + query = mfdToQueryString(decodeIfNeeded(postData), contentType); break; } diff --git a/src/lib/warcrecord.ts b/src/lib/warcrecord.ts index 5546f9a..a8a3b37 100644 --- a/src/lib/warcrecord.ts +++ b/src/lib/warcrecord.ts @@ -176,8 +176,6 @@ export class WARCRecord extends BaseAsyncIterReader { _offset: number | undefined = 0; _length = 0; - method: string | undefined = ""; - requestBody = ""; _urlkey = ""; constructor({