Skip to content

Commit

Permalink
Change resource table to be based on mime type (#1698)
Browse files Browse the repository at this point in the history
Follow-up to #1689 

Instead of just using resource types, using a combination of mime type, resource type and extension analysis.
Current logic is:
    - if resourceType is script, stylesheet, image, font, use that definitely
    - if resourceType is fetch/xhr, also check for extension, specifically for images
    - categorize favicon.ico separately as 'favicon'
    - also check for extension for json
    - also check for pdf by type + extension
    - categorize 'ping' as 'other'
    - by default, use the first part of mime as the type
  • Loading branch information
tw4l authored Apr 21, 2024
1 parent 4360e0c commit d31bdd2
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 45 deletions.
167 changes: 131 additions & 36 deletions frontend/src/pages/org/archived-item-qa/archived-item-qa.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,34 +46,39 @@ import { getLocale } from "@/utils/localization";

const DEFAULT_PAGE_SIZE = 100;

type PageResource = {
status?: number;
mime?: string;
type?: string;
};

// From https://developer.mozilla.org/en-US/docs/Web/Media/Formats/Image_types
const IMG_EXTS = [
"apng",
"avif",
"gif",
"jpg",
"jpeg",
"jfif",
"pjpeg",
"pjp",
"png",
"svg",
"webp",
"tif",
"tiff",
"bmp",
"ico",
"cur",
];

const tabToPrefix: Record<QATypes.QATab, string> = {
screenshots: "view",
text: "text",
resources: "pageinfo",
replay: "",
};

const resourceTypes = [
"document",
"image",
"media",
"stylesheet",
"font",
"script",
"xhr",
"fetch",
"prefetch",
"eventsource",
"websocket",
"manifest",
"ping",
"cspviolationreport",
"preflight",
"signedexchange",
"texttrack",
"other",
];

@localized()
@customElement("btrix-archived-item-qa")
export class ArchivedItemQA extends TailwindElement {
Expand Down Expand Up @@ -856,6 +861,73 @@ export class ArchivedItemQA extends TailwindElement {
}
}

private resolveType(url: string, { mime = "", type }: PageResource) {
if (type) {
type = type.toLowerCase();
}

// Map common mime types where important information would be lost
// if we only use first half to more descriptive resource types
if (type === "script" || mime.includes("javascript")) {
return "javascript";
}
if (type === "stylesheet" || mime.includes("css")) {
return "stylesheet";
}
if (type === "image") {
return "image";
}
if (type === "font") {
return "font";
}
if (type === "ping") {
return "other";
}

if (url.endsWith("favicon.ico")) {
return "favicon";
}

let path = "";

try {
path = new URL(url).pathname;
} catch (e) {
// ignore
}

const ext = path.slice(path.lastIndexOf(".") + 1);

if (type === "fetch" || type === "xhr") {
if (IMG_EXTS.includes(ext)) {
return "image";
}
}

if (mime.includes("json") || ext === "json") {
return "json";
}

if (mime.includes("pdf") || ext === "pdf") {
return "pdf";
}

if (
type === "document" ||
mime.includes("html") ||
ext === "html" ||
ext === "htm"
) {
return "html";
}

if (!mime) {
return "other";
}

return mime.split("/")[0];
}

private async fetchContentForTab({ qa } = { qa: false }): Promise<void> {
const page = this.page;
const tab = this.tab;
Expand All @@ -879,7 +951,7 @@ export class ArchivedItemQA extends TailwindElement {
const timestamp = page.ts?.split(".")[0].replace(/\D/g, "");
const pageUrl = page.url;

const doLoad = async () => {
const doLoad = async (isQA: boolean) => {
const urlPrefix = tabToPrefix[tab];
const urlPart = `${timestamp}mp_/${urlPrefix ? `urn:${urlPrefix}:` : ""}${pageUrl}`;
const url = `/replay/w/${sourceId}/${urlPart}`;
Expand Down Expand Up @@ -907,20 +979,30 @@ export class ArchivedItemQA extends TailwindElement {
// tab === "resources"

const json = (await resp.json()) as {
urls: { status?: number; type?: string }[];
urls: PageResource[];
};
// console.log(json);

const typeMap = new Map<string, QATypes.GoodBad>();
resourceTypes.forEach((x) => typeMap.set(x, { good: 0, bad: 0 }));
let good = 0,
bad = 0;

for (const entry of Object.values(json.urls)) {
const { type: resType_ = "", status = 0 } = entry;
const resType = resType_.toLowerCase();
for (const [url, entry] of Object.entries(json.urls)) {
const { status = 0, type, mime } = entry;
const resType = this.resolveType(url, entry);

// for debugging
logResource(isQA, resType, url, type, mime, status);

if (typeMap.has(resType)) {
if (!typeMap.has(resType)) {
if (status < 400) {
typeMap.set(resType, { good: 1, bad: 0 });
good++;
} else {
typeMap.set(resType, { good: 0, bad: 1 });
bad++;
}
} else {
const count = typeMap.get(resType);
if (status < 400) {
count!.good++;
Expand All @@ -933,13 +1015,6 @@ export class ArchivedItemQA extends TailwindElement {
}
}

// remove empty entries
resourceTypes.forEach((x) => {
if (!typeMap.get(x)) {
typeMap.delete(x);
}
});

typeMap.set("Total", { good, bad });

// const text = JSON.stringify(
Expand All @@ -954,7 +1029,7 @@ export class ArchivedItemQA extends TailwindElement {
};

try {
const content = await doLoad();
const content = await doLoad(qa);

if (qa) {
this.qaData = {
Expand Down Expand Up @@ -1076,3 +1151,23 @@ export class ArchivedItemQA extends TailwindElement {
}
}
}

// leaving here for further debugging of resources
function logResource(
_isQA: boolean,
_resType: string,
_url: string,
_type?: string,
_mime?: string,
_status = 0,
) {
// console.log(
// _isQA ? "replay" : "crawl",
// _status >= 400 ? "bad" : "good",
// _resType,
// _type,
// _mime,
// _status,
// _url,
// );
}
24 changes: 15 additions & 9 deletions frontend/src/pages/org/archived-item-qa/ui/resources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,29 @@ function renderDiff(
${qaResources[TOTAL].bad.toLocaleString()}
</span>`,
],
...Object.keys(crawlResources)
...Object.keys(qaResources)
.filter((key) => key !== TOTAL)
.map((key) => [
html`<span class=${tw`capitalize`}>${key}</span>`,
html`${crawlResources[key].good.toLocaleString()}`,
html`${crawlResources[key].bad.toLocaleString()}`,
html`${Object.prototype.hasOwnProperty.call(crawlResources, key)
? crawlResources[key].good.toLocaleString()
: 0}`,
html`${Object.prototype.hasOwnProperty.call(crawlResources, key)
? crawlResources[key].bad.toLocaleString()
: 0}`,
html`<span
class=${crawlResources[key].good !== qaResources[key].good
? tw`text-danger`
: tw`text-neutral-400`}
class=${Object.prototype.hasOwnProperty.call(crawlResources, key) &&
crawlResources[key].good === qaResources[key].good
? tw`text-neutral-400`
: tw`text-danger`}
>
${qaResources[key].good.toLocaleString()}
</span>`,
html`<span
class=${crawlResources[key].bad !== qaResources[key].bad
? tw`font-semibold text-danger`
: tw`text-neutral-400`}
class=${Object.prototype.hasOwnProperty.call(crawlResources, key) &&
crawlResources[key].bad === qaResources[key].bad
? tw`text-neutral-400`
: tw`font-semibold text-danger`}
>
${qaResources[key].bad.toLocaleString()}
</span>`,
Expand Down

0 comments on commit d31bdd2

Please sign in to comment.