From a50299a97a956ec5d106ea53232b6d4342affd98 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 13 Nov 2024 19:02:13 -0800 Subject: [PATCH] indexer: do case-insensitive search for index, in case Headers can not be converted - part of fix for webrecorder/browsertrix-crawler#722 - add warc-protocol to multi-value headers for now --- src/lib/indexer.ts | 14 +++++++++++--- src/lib/utils.ts | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/lib/indexer.ts b/src/lib/indexer.ts index 11971a9..6436319 100644 --- a/src/lib/indexer.ts +++ b/src/lib/indexer.ts @@ -140,11 +140,19 @@ abstract class BaseIndexer { if (field.startsWith("http:")) { if (record.httpHeaders) { - let headers: Headers | Map = record.httpHeaders.headers; + const headers: Headers | Map = record.httpHeaders.headers; + const name = field.slice(5); + let value = headers.get(name); + // just do lower-case search to avoid conversion in case there may be errors if (headers instanceof Map) { - headers = new Headers(Object.fromEntries(headers)); + const nameLower = name.toLowerCase(); + for (const keyName of headers.keys()) { + if (nameLower === keyName.toLowerCase()) { + value = headers.get(keyName); + } + } } - return headers.get(field.slice(5)); + return value; } return null; } diff --git a/src/lib/utils.ts b/src/lib/utils.ts index 52c30b4..51a995e 100644 --- a/src/lib/utils.ts +++ b/src/lib/utils.ts @@ -291,7 +291,7 @@ export function splitChunk( // =========================================================================== // headers multi map -const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to"]; +const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"]; // using something other than comma to reduce change of any collisions with actual data // in theory, collision still possible with arbitrary cookie value