Skip to content

Commit

Permalink
indexer: do case-insensitive search for index, in case Headers can no…
Browse files Browse the repository at this point in the history
…t be converted

- part of fix for webrecorder/browsertrix-crawler#722
- add warc-protocol to multi-value headers for now
  • Loading branch information
ikreymer committed Nov 14, 2024
1 parent 1883c33 commit a50299a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
14 changes: 11 additions & 3 deletions src/lib/indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,19 @@ abstract class BaseIndexer {

if (field.startsWith("http:")) {
if (record.httpHeaders) {
let headers: Headers | Map<string, string> = record.httpHeaders.headers;
const headers: Headers | Map<string, string> = record.httpHeaders.headers;
const name = field.slice(5);
let value = headers.get(name);
// just do lower-case search to avoid conversion in case there may be errors
if (headers instanceof Map) {
headers = new Headers(Object.fromEntries(headers));
const nameLower = name.toLowerCase();
for (const keyName of headers.keys()) {
if (nameLower === keyName.toLowerCase()) {
value = headers.get(keyName);
}
}
}
return headers.get(field.slice(5));
return value;
}
return null;
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ export function splitChunk(

// ===========================================================================
// headers multi map
const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to"];
const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"];

// using something other than comma to reduce change of any collisions with actual data
// in theory, collision still possible with arbitrary cookie value
Expand Down

0 comments on commit a50299a

Please sign in to comment.