Skip to content

Commit

Permalink
Limit URL list entry to maximum URLs (#1242)
Browse files Browse the repository at this point in the history
- Limits URL list entry to 1,000 URLs
- Limits additional URL list entry to 100 URLs
- Shows first invalid URL in list in error message
- Quick and dirty fix for long URLs wrapping: Show URLs in list on one line, with entire container scrolling
---------

Co-authored-by: Henry Wilkinson <[email protected]>
  • Loading branch information
SuaYoo and Shrinks99 authored Oct 4, 2023
1 parent 99ccdf2 commit 38efecc
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 42 deletions.
7 changes: 4 additions & 3 deletions frontend/src/components/config-details.ts
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,13 @@ export class ConfigDetails extends LiteElement {

private renderConfirmUrlListSettings = () => {
const crawlConfig = this.crawlConfig;

return html`
${this.renderSetting(
msg("List of URLs"),
html`
<ul>
${this.seeds?.map((seed: Seed) => html` <li>${seed.url}</li> `)}
<ul class="whitespace-nowrap overflow-x-auto overflow-y-hidden">
${this.seeds?.map((seed: Seed) => html`<li>${seed.url}</li>`)}
</ul>
`,
true
Expand Down Expand Up @@ -368,7 +369,7 @@ export class ConfigDetails extends LiteElement {
msg("List of Additional URLs"),
additionalUrlList?.length
? html`
<ul>
<ul class="whitespace-nowrap overflow-x-auto overflow-y-hidden">
${additionalUrlList.map(
(seed) =>
html`<li>${typeof seed === "string" ? seed : seed.url}</li>`
Expand Down
130 changes: 91 additions & 39 deletions frontend/src/pages/org/workflow-editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ function getLocalizedWeekDays() {
}

function validURL(url: string) {
return /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/.test(
return /((((https?):(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)/.test(
url
);
}
Expand All @@ -220,6 +220,7 @@ const DEFAULT_BEHAVIORS = [
"siteSpecific",
];
const BYTES_PER_GB = 1e9;
const URL_LIST_MAX_URLS = 1000;

@localized()
export class CrawlConfigEditor extends LiteElement {
Expand Down Expand Up @@ -910,6 +911,7 @@ export class CrawlConfigEditor extends LiteElement {
${this.renderFormCol(html`
<sl-textarea
name="urlList"
class="textarea-wrap"
label=${msg("List of URLs")}
rows="10"
autocomplete="off"
Expand All @@ -918,34 +920,42 @@ export class CrawlConfigEditor extends LiteElement {
placeholder=${`https://example.com
https://example.com/path`}
required
@sl-input=${async (e: Event) => {
@keyup=${async (e: KeyboardEvent) => {
if (e.key === "Enter") {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (!inputEl.value) return;
const { isValid, helpText } = this.validateUrlList(inputEl.value);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
}
}}
@sl-input=${(e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (
!inputEl.checkValidity() &&
!urlListToArray(inputEl.value).some((url) => !validURL(url))
) {
inputEl.setCustomValidity("");
inputEl.helpText = "";
if (!inputEl.value) {
inputEl.helpText = msg("At least 1 URL is required.");
}
}}
@sl-blur=${async (e: Event) => {
@sl-change=${async (e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (
inputEl.value &&
urlListToArray(inputEl.value).some((url) => !validURL(url))
) {
const text = msg("Please fix invalid URL in list.");
inputEl.helpText = text;
inputEl.setCustomValidity(text);
if (!inputEl.value) return;
const { isValid, helpText } = this.validateUrlList(inputEl.value);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
}}
></sl-textarea>
`)}
${this.renderHelpTextCol(
msg(`The crawler will visit and record each URL listed in the order
defined here.`)
msg(str`The crawler will visit and record each URL listed in the order
defined here. You can enter a maximum of ${URL_LIST_MAX_URLS.toLocaleString()} URLs, separated by a new line.`)
)}
${when(
isCustom,
Expand Down Expand Up @@ -1117,6 +1127,7 @@ https://example.com/path`}
}
const exclusions = trimArray(this.formState.exclusions || []);
const additionalUrlList = urlListToArray(this.formState.urlList);
const maxAdditionalURls = 100;

return html`
${this.renderFormCol(html`
Expand Down Expand Up @@ -1311,34 +1322,48 @@ https://example.net`}
value=${this.formState.urlList}
placeholder=${`https://webrecorder.net/blog
https://archiveweb.page/images/${"logo.svg"}`}
@sl-input=${async (e: Event) => {
@keyup=${async (e: KeyboardEvent) => {
if (e.key === "Enter") {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (!inputEl.value) return;
const { isValid, helpText } = this.validateUrlList(
inputEl.value,
maxAdditionalURls
);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
}
}}
@sl-input=${(e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (
!inputEl.checkValidity() &&
!urlListToArray(inputEl.value).some((url) => !validURL(url))
) {
inputEl.setCustomValidity("");
inputEl.helpText = "";
if (!inputEl.value) {
inputEl.helpText = msg("At least 1 URL is required.");
}
}}
@sl-blur=${async (e: Event) => {
@sl-change=${async (e: CustomEvent) => {
const inputEl = e.target as SlInput;
await inputEl.updateComplete;
if (
inputEl.value &&
urlListToArray(inputEl.value).some((url) => !validURL(url))
) {
const text = msg("Please fix invalid URL in list.");
inputEl.helpText = text;
inputEl.setCustomValidity(text);
if (!inputEl.value) return;
const { isValid, helpText } = this.validateUrlList(
inputEl.value,
maxAdditionalURls
);
inputEl.helpText = helpText;
if (isValid) {
inputEl.setCustomValidity("");
} else {
inputEl.setCustomValidity(helpText);
}
}}
></sl-textarea>
`)}
${this.renderHelpTextCol(
msg(`The crawler will visit and record each URL listed here. Other
links on these pages will not be crawled.`)
msg(str`The crawler will visit and record each URL listed here. Other
links on these pages will not be crawled. You can enter up to ${maxAdditionalURls.toLocaleString()} URLs.`)
)}
</div>
</btrix-details>
Expand Down Expand Up @@ -2232,6 +2257,33 @@ https://archiveweb.page/images/${"logo.svg"}`}
`;
}

private validateUrlList(
value: string,
max = URL_LIST_MAX_URLS
): { isValid: boolean; helpText: string } {
const urlList = urlListToArray(value);
let isValid = true;
let helpText =
urlList.length === 1
? msg(str`${urlList.length.toLocaleString()} URL entered`)
: msg(str`${urlList.length.toLocaleString()} URLs entered`);
if (urlList.length > max) {
isValid = false;
helpText = msg(
str`Please shorten list to ${max.toLocaleString()} or fewer URLs.`
);
} else {
const invalidUrl = urlList.find((url) => !validURL(url));
if (invalidUrl) {
isValid = false;
helpText = msg(
str`Please remove or fix the following invalid URL: ${invalidUrl}`
);
}
}
return { isValid, helpText };
}

private onTagInput = (e: TagInputEvent) => {
const { value } = e.detail;
if (!value) return;
Expand Down
5 changes: 5 additions & 0 deletions frontend/src/theme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,11 @@ const theme = css`
--help-text-align: right;
}
/* Wrap internal textarea input, e.g. for URL lists */
.textarea-wrap::part(textarea) {
white-space: pre;
}
/* Aesthetically closer to monospaced font: */
.font-monostyle {
font-family: var(--font-monostyle-family);
Expand Down

0 comments on commit 38efecc

Please sign in to comment.