Skip to content

Commit

Permalink
support for configurable cookie / storage archiving, from #276
Browse files Browse the repository at this point in the history
fix for facebook archiving + replay, fixes #272 via wabac.js 2.20.4
fixes crash when facebook archiving as well, fixes #273
  • Loading branch information
ikreymer committed Nov 20, 2024
1 parent 8d925d6 commit b534aef
Show file tree
Hide file tree
Showing 6 changed files with 212 additions and 74 deletions.
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
"dependencies": {
"@fortawesome/fontawesome-free": "^5.13.0",
"@ipld/car": "^5.3.1",
"@webrecorder/awp-sw": "^0.5.0",
"@webrecorder/wabac": "^2.20.3",
"@webrecorder/awp-sw": "^0.5.1",
"@webrecorder/wabac": "^2.20.4",
"auto-js-ipfs": "^2.3.0",
"browsertrix-behaviors": "^0.6.4",
"btoa": "^1.2.1",
Expand All @@ -31,7 +31,7 @@
"tsconfig-paths-webpack-plugin": "^4.1.0",
"unused-filename": "^4.0.1",
"uuid": "^8.3.2",
"warcio": "^2.3.1"
"warcio": "^2.4.2"
},
"devDependencies": {
"@typescript-eslint/eslint-plugin": "^6.15.0",
Expand Down Expand Up @@ -64,7 +64,7 @@
"webpack-extension-reloader": "^1.1.4"
},
"resolutions": {
"@webrecorder/wabac": "^2.20.3"
"@webrecorder/wabac": "^2.20.4"
},
"files": [
"src/",
Expand Down
126 changes: 102 additions & 24 deletions src/recorder.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { RequestResponseInfo } from "./requestresponseinfo";

import { getCustomRewriter, rewriteDASH, rewriteHLS } from "@webrecorder/wabac";
import {
getCustomRewriter,
rewriteDASH,
rewriteHLS,
removeRangeAsQuery,
} from "@webrecorder/wabac";

import { Buffer } from "buffer";

Expand All @@ -15,6 +20,7 @@ import {
BEHAVIOR_PAUSED,
BEHAVIOR_DONE,
} from "./consts";
import { getLocalOption } from "./localstorage";

// @ts-expect-error - TS2554 - Expected 0 arguments, but got 1.
const encoder = new TextEncoder("utf-8");
Expand All @@ -34,9 +40,26 @@ function sleep(time) {
return new Promise((resolve) => setTimeout(() => resolve(), time));
}

type FetchEntry = {
url: string;
headers?: Headers;
rangeReplaced?: boolean;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
sessions?: any[];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
pageInfo?: any;

rangeRemoved?: boolean;
doRangeCheck?: boolean;
redirectOnly?: boolean;
};

// ===========================================================================
class Recorder {
recordStorage = true;
archiveStorage = false;
archiveCookies = false;

_fetchQueue: FetchEntry[] = [];

constructor() {
// @ts-expect-error - TS2339 - Property 'flatMode' does not exist on type 'Recorder'.
Expand Down Expand Up @@ -79,8 +102,7 @@ class Recorder {

// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
this._fetchPending = new Map();
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
this._fetchQueue = [];

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
this._fetchUrls = new Set();

Expand Down Expand Up @@ -128,6 +150,13 @@ class Recorder {
this.defaultFetchOpts = {
redirect: "manual",
};

this.initOpts();
}

async initOpts() {
this.archiveCookies = (await getLocalOption("archiveCookies") === "1");
this.archiveStorage = (await getLocalOption("archiveStorage") === "1");
}

// @ts-expect-error - TS7006 - Parameter 'autorun' implicitly has an 'any' type.
Expand Down Expand Up @@ -860,7 +889,7 @@ class Recorder {
// @ts-expect-error - TS7006 - Parameter 'url' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
handleWindowOpen(url, sessions) {
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
const headers = { Referer: this.pageInfo.url };
const headers = new Headers({ Referer: this.pageInfo.url });
this.doAsyncFetch({ url, headers, redirectOnly: true }, sessions);
}

Expand Down Expand Up @@ -1450,8 +1479,12 @@ class Recorder {
//this._fetchPending.set(requestId, pending);

try {
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
const data = reqresp.toDBRecord(reqresp.payload, this.pageInfo);
const data = reqresp.toDBRecord(
reqresp.payload,
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
this.pageInfo,
this.archiveCookies,
);

// top-level URL is a non-GET request
if (
Expand Down Expand Up @@ -1513,7 +1546,7 @@ class Recorder {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async getStorage(sessions: any) {
// check if recording storage is allowed
if (!this.recordStorage) {
if (!this.archiveStorage) {
return null;
}

Expand Down Expand Up @@ -1576,7 +1609,7 @@ class Recorder {

reqresp.fillResponseRedirect(params);
// @ts-expect-error - TS2339 - Property 'pageInfo' does not exist on type 'Recorder'.
data = reqresp.toDBRecord(null, this.pageInfo);
data = reqresp.toDBRecord(null, this.pageInfo, this.archiveCookies);
}

reqresp.fillRequest(params);
Expand Down Expand Up @@ -1629,14 +1662,14 @@ class Recorder {
for (const { value } of params.events) {
if (value.indexOf('"kLoad"') > 0) {
const { url } = JSON.parse(value);
this.doAsyncFetch({ url }, sessions);
this.doAsyncFetch({ url, doRangeCheck: true }, sessions);
break;
}
}
}

// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'resp' implicitly has an 'any' type.
async attemptFetchRedirect(request, resp) {
async attemptFetchRedirect(request: FetchEntry, resp) {
if (request.redirectOnly && resp.type === "opaqueredirect") {
const abort = new AbortController();
// @ts-expect-error - TS2345 - Argument of type '{ abort: AbortController; }' is not assignable to parameter of type 'RequestInit'.
Expand Down Expand Up @@ -1671,11 +1704,19 @@ class Recorder {
}

// @ts-expect-error - TS7006 - Parameter 'request' implicitly has an 'any' type. | TS7006 - Parameter 'sessions' implicitly has an 'any' type.
doAsyncFetch(request, sessions) {
doAsyncFetch(request: FetchEntry, sessions) {
if (!request || !this.isValidUrl(request.url)) {
return;
}

if (request.doRangeCheck) {
const url = removeRangeAsQuery(request.url);
if (url) {
request.url = url;
request.rangeRemoved = true;
}
}

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
if (this._fetchUrls.has(request.url)) {
console.log("Skipping, already fetching: " + request.url);
Expand All @@ -1686,15 +1727,13 @@ class Recorder {
request.pageInfo = this.pageInfo;
request.sessions = sessions;

// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
this._fetchQueue.push(request);

this.doBackgroundFetch();
}

async doBackgroundFetch() {
if (
// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
!this._fetchQueue.length ||
// @ts-expect-error - TS2339 - Property '_fetchPending' does not exist on type 'Recorder'.
this._fetchPending.size >= MAX_CONCURRENT_FETCH ||
Expand All @@ -1704,8 +1743,10 @@ class Recorder {
return;
}

// @ts-expect-error - TS2339 - Property '_fetchQueue' does not exist on type 'Recorder'.
const request = this._fetchQueue.shift();
if (!request) {
return;
}

// @ts-expect-error - TS2339 - Property '_fetchUrls' does not exist on type 'Recorder'.
if (this._fetchUrls.has(request.url)) {
Expand All @@ -1732,11 +1773,9 @@ class Recorder {
// @ts-expect-error - TS2339 - Property 'defaultFetchOpts' does not exist on type 'Recorder'.
const opts = { ...this.defaultFetchOpts };

if (request.getRequestHeadersDict) {
opts.headers = request.getRequestHeadersDict().headers;
opts.headers.delete("range");
} else if (request.headers) {
if (request.headers) {
opts.headers = request.headers;
opts.headers.delete("range");
}

let resp = await fetch(request.url, opts);
Expand Down Expand Up @@ -1779,8 +1818,16 @@ class Recorder {
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
reqresp.payload = new Uint8Array(payload);

// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
const data = reqresp.toDBRecord(reqresp.payload, request.pageInfo);
if (request.rangeRemoved) {
reqresp.extraOpts["rangeRemoved"] = "1";
}

const data = reqresp.toDBRecord(
// @ts-expect-error - TS2339 - Property 'payload' does not exist on type 'RequestResponseInfo'.
reqresp.payload,
request.pageInfo,
this.archiveCookies,
);

if (data) {
await this.commitResource(data, request.pageInfo);
Expand Down Expand Up @@ -1813,9 +1860,36 @@ class Recorder {
let payload;

if (reqresp.status === 206) {
sleep(500).then(() => this.doAsyncFetch(reqresp, sessions));
sleep(500).then(() =>
this.doAsyncFetch(
{
url: reqresp.url,
headers: reqresp.getRequestHeadersDict().headers,
},
sessions,
),
);
reqresp.payload = null;
return null;
} else {
const changedUrl = removeRangeAsQuery(reqresp.url);

if (changedUrl) {
reqresp.url = changedUrl;
this.removeReqResp(reqresp.requestId);
sleep(500).then(() =>
this.doAsyncFetch(
{
url: changedUrl,
headers: reqresp.getRequestHeadersDict().headers,
rangeRemoved: true,
},
sessions,
),
);
reqresp.payload = null;
return null;
}
}

if (!this.noResponseForStatus(reqresp.status)) {
Expand Down Expand Up @@ -1888,9 +1962,13 @@ class Recorder {
if (reqresp.payload) {
// @ts-expect-error - TS2571 - Object is of type 'unknown'.
console.log(`Committing Finished ${id} - ${reqresp.url}`);

// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
const data = reqresp.toDBRecord(reqresp.payload, pageInfo);
const data = reqresp.toDBRecord(
// @ts-expect-error - TS2571 - Object is of type 'unknown'. | TS2571 - Object is of type 'unknown'.
reqresp.payload,
pageInfo,
this.archiveCookies,
);

if (data) {
// @ts-expect-error - TS2554 - Expected 2 arguments, but got 1.
Expand Down
12 changes: 8 additions & 4 deletions src/requestresponseinfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ const encoder = new TextEncoder();

// ===========================================================================
class RequestResponseInfo {
extraOpts: Record<string, string>;

// @ts-expect-error - TS7006 - Parameter 'requestId' implicitly has an 'any' type.
constructor(requestId) {
// @ts-expect-error - TS2339 - Property '_created' does not exist on type 'RequestResponseInfo'.
Expand Down Expand Up @@ -70,7 +72,6 @@ class RequestResponseInfo {
// @ts-expect-error - TS2339 - Property 'resourceType' does not exist on type 'RequestResponseInfo'.
this.resourceType = null;

// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
this.extraOpts = {};
}

Expand Down Expand Up @@ -212,7 +213,7 @@ class RequestResponseInfo {
}

// @ts-expect-error - TS7006 - Parameter 'payload' implicitly has an 'any' type. | TS7006 - Parameter 'pageInfo' implicitly has an 'any' type.
toDBRecord(payload, pageInfo) {
toDBRecord(payload, pageInfo, allowCookies) {
// don't save 304 (todo: turn into 'revisit' style entry?)
// extra check for 206, should already be skipped
if (
Expand Down Expand Up @@ -257,7 +258,11 @@ class RequestResponseInfo {
const cookie = reqHeaders.headers.get("cookie");

if (cookie) {
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
if (allowCookies) {
respHeaders.headersDict["x-wabac-preset-cookie"] = cookie;
} else {
reqHeaders.headers.delete("cookie");
}
}

// @ts-expect-error - TS2339 - Property 'url' does not exist on type 'RequestResponseInfo'.
Expand Down Expand Up @@ -312,7 +317,6 @@ class RequestResponseInfo {
mime,
respHeaders: respHeaders.headersDict,
reqHeaders: reqHeaders.headersDict,
// @ts-expect-error - TS2339 - Property 'extraOpts' does not exist on type 'RequestResponseInfo'.
extraOpts: this.extraOpts,
};

Expand Down
4 changes: 2 additions & 2 deletions src/ui/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1426,14 +1426,14 @@ class ArchiveWebApp extends ReplayWebApp {
const archiveStorage = this.renderRoot.querySelector("#archiveStorage");

if (archiveCookies) {
self.localStorage.setItem(
await setLocalOption(
"archiveCookies",
(archiveCookies as HTMLInputElement).checked ? "1" : "0",
);
}

if (archiveStorage) {
self.localStorage.setItem(
await setLocalOption(
"archiveStorage",
(archiveStorage as HTMLInputElement).checked ? "1" : "0",
);
Expand Down
2 changes: 1 addition & 1 deletion webpack.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ function sharedBuild(
sw: "./src/sw/main.ts",
...entry,
},
devtool: argv.mode === "production" ? undefined : "source-map",
devtool: argv.mode === "production" ? undefined : "inline-source-map",
optimization: argv.mode === "production" ? optimization : undefined,
output: {
path: outputPath,
Expand Down
Loading

0 comments on commit b534aef

Please sign in to comment.