From fe2f5c418f37805a584a9d4a04d83c8b6327343d Mon Sep 17 00:00:00 2001 From: Jagoda Berry Rybacka Date: Tue, 1 Aug 2023 19:50:14 +0200 Subject: [PATCH 1/7] Change current provider on timeouted RPC requests If some RPC request is failing because of a timeout we should change to a different RPC provider. --- .../services/chain/serial-fallback-provider.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index bf080c56d9..7ab70b9e74 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -381,6 +381,8 @@ export default class SerialFallbackProvider extends JsonRpcProvider { return result } + // FIXME: currentProvider.send is not rethrowing rate limiting errors, they are being swallowed here + // and we have to wait for them to timeout to react and change current provider const result = await this.currentProvider.send(method, params) // If https://github.com/tc39/proposal-decorators ever gets out of Stage 3 // cleaning up the messageToSend object seems like a great job for a decorator @@ -390,6 +392,19 @@ export default class SerialFallbackProvider extends JsonRpcProvider { // Awful, but what can ya do. const stringifiedError = String(error) + // We are getting rate limited probably, let's try different provider without retrying + if ( + stringifiedError.includes("Error: timeout") && + this.currentProviderIndex + 1 < this.providerCreators.length + ) { + // If there is another provider to try - try to send the message on that provider + if (stringifiedError.includes(this.currentProvider.connection.url)) { + return await this.attemptToSendMessageOnNewProvider(messageId) + } + // We've already switched to a different provider, let's try again + return await this.routeRpcCall(messageId) + } + if ( /** * WebSocket is already in CLOSING - We are reconnecting From e8104575c6816f05ee22c25f1f76dc4093b4c791 Mon Sep 17 00:00:00 2001 From: Jorge Luis <28708889+hyphenized@users.noreply.github.com> Date: Tue, 1 Aug 2023 20:35:59 -0500 Subject: [PATCH 2/7] Set retry/timeout options on fallback providers `fetchJson` defaults (12 attempts, 120s timeout) don't really make sense here as we want to fallback to another provider in case the current one is not able to fulfill the request in a reasonable amount of time. --- .../services/chain/serial-fallback-provider.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index 7ab70b9e74..2008a3c166 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -72,6 +72,9 @@ const WAIT_BEFORE_SEND_AGAIN = 100 const BALANCE_TTL = 1 * SECOND // How often to cleanup our hasCode and balance caches. const CACHE_CLEANUP_INTERVAL = 10 * SECOND +// How long to wait for a provider to respond before falling back to the next provider. +const PROVIDER_REQUEST_TIMEOUT = 5 * SECOND + /** * Wait the given number of ms, then run the provided function. Returns a * promise that will resolve after the delay has elapsed and the passed @@ -1021,11 +1024,19 @@ function getProviderCreator( return new WebSocketProvider(rpcUrl) } - if (/rpc\.ankr\.com|1rpc\.io|polygon-rpc\.com/.test(url.hostname)) { - return new JsonRpcBatchProvider(rpcUrl) + if (/rpc\.ankr\.com|1rpc\.io|polygon-rpc\.com/.test(url.href)) { + return new JsonRpcBatchProvider({ + url: rpcUrl, + throttleLimit: 1, + timeout: PROVIDER_REQUEST_TIMEOUT, + }) } - return new JsonRpcProvider(rpcUrl) + return new JsonRpcProvider({ + url: rpcUrl, + throttleLimit: 1, + timeout: PROVIDER_REQUEST_TIMEOUT, + }) } export function makeFlashbotsProviderCreator(): ProviderCreator { From 7236b7e447e8068f1de7263dfd55b0db80628a31 Mon Sep 17 00:00:00 2001 From: hyphenized <28708889+hyphenized@users.noreply.github.com> Date: Tue, 1 Aug 2023 23:47:50 -0500 Subject: [PATCH 3/7] Drop retries for server/network errors Fail over to the next provider rather than performing retries after exponential back offs on timeouts/rate limits. Because multiple services rely on the chain service and hence, the currently connected provider, parallel back off retries end up resetting limits on the connected node. For a request that was throttled/rate limited, by the time the back off delay has passed, a different request could have been fulfilled instead. This results in a lot of wasted time and resources as we keep retrying possibly several requests at the same time, ending in the same bad state. --- .../chain/serial-fallback-provider.ts | 101 +++++++++--------- 1 file changed, 50 insertions(+), 51 deletions(-) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index 2008a3c166..e0d9d724fb 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -384,8 +384,6 @@ export default class SerialFallbackProvider extends JsonRpcProvider { return result } - // FIXME: currentProvider.send is not rethrowing rate limiting errors, they are being swallowed here - // and we have to wait for them to timeout to react and change current provider const result = await this.currentProvider.send(method, params) // If https://github.com/tc39/proposal-decorators ever gets out of Stage 3 // cleaning up the messageToSend object seems like a great job for a decorator @@ -395,65 +393,66 @@ export default class SerialFallbackProvider extends JsonRpcProvider { // Awful, but what can ya do. const stringifiedError = String(error) - // We are getting rate limited probably, let's try different provider without retrying - if ( - stringifiedError.includes("Error: timeout") && - this.currentProviderIndex + 1 < this.providerCreators.length - ) { - // If there is another provider to try - try to send the message on that provider - if (stringifiedError.includes(this.currentProvider.connection.url)) { - return await this.attemptToSendMessageOnNewProvider(messageId) - } - // We've already switched to a different provider, let's try again - return await this.routeRpcCall(messageId) - } - if ( /** * WebSocket is already in CLOSING - We are reconnecting - * bad response - error on the endpoint provider's side - * missing response - we might be disconnected due to network instability - * we can't execute this request - ankr rate limit hit + * - bad response + * error on the endpoint provider's side + * - missing response + * We might be disconnected due to network instability + * - we can't execute this request + * ankr rate limit hit + * - failed response + * fetchJson default "fallback" error, generally thrown after 429s + * - TIMEOUT + * fetchJson timed out, we could retry but it's safer to just fail over + * - NETWORK_ERROR + * Any other network error, including no-network errors + * + * Note: We can't use ether's generic SERVER_ERROR because it's also + * used for invalid responses from the server, which we can retry on */ stringifiedError.match( - /WebSocket is already in CLOSING|bad response|missing response|we can't execute this request/ + /WebSocket is already in CLOSING|bad response|missing response|we can't execute this request|failed response|TIMEOUT|NETWORK_ERROR/ ) ) { - if (this.shouldSendMessageOnNextProvider(messageId)) { - // If there is another provider to try - try to send the message on that provider - if (this.currentProviderIndex + 1 < this.providerCreators.length) { - return await this.attemptToSendMessageOnNewProvider(messageId) - } + // If there is another provider to try - try to send the message on that provider + if (this.currentProviderIndex + 1 < this.providerCreators.length) { + return await this.attemptToSendMessageOnNewProvider(messageId) + } - // Otherwise, set us up for the next call, but fail the - // current one since we've gone through every available provider. Note - // that this may happen over time, but we still fail the request that - // hits the end of the list. - this.currentProviderIndex = 0 - - // Reconnect, but don't wait for the connection to go through. - this.reconnectProvider() - delete this.messagesToSend[messageId] - throw error - } else { - const backoff = this.backoffFor(messageId) - logger.debug( - "Backing off for", - backoff, - "and retrying: ", - method, - params - ) + // Otherwise, set us up for the next call, but fail the + // current one since we've gone through every available provider. Note + // that this may happen over time, but we still fail the request that + // hits the end of the list. + this.currentProviderIndex = 0 - return await waitAnd(backoff, async () => { - if (isClosedOrClosingWebSocketProvider(this.currentProvider)) { - await this.reconnectProvider() - } + // Reconnect, but don't wait for the connection to go through. + this.reconnectProvider() + delete this.messagesToSend[messageId] + throw error + } else if ( + // If we received some bogus response, let's try again + stringifiedError.match(/bad result from backend/) && + this.shouldSendMessageOnNextProvider(messageId) + ) { + const backoff = this.backoffFor(messageId) + logger.debug( + "Backing off for", + backoff, + "and retrying: ", + method, + params + ) - logger.debug("Retrying", method, params) - return this.routeRpcCall(messageId) - }) - } + return await waitAnd(backoff, async () => { + if (isClosedOrClosingWebSocketProvider(this.currentProvider)) { + await this.reconnectProvider() + } + + logger.debug("Retrying", method, params) + return this.routeRpcCall(messageId) + }) } logger.debug( From c0d814ac6ed83dad24b824217c818dd128ab7e7b Mon Sep 17 00:00:00 2001 From: hyphenized <28708889+hyphenized@users.noreply.github.com> Date: Wed, 2 Aug 2023 00:09:09 -0500 Subject: [PATCH 4/7] Reset current provider after using alchemy as a fallback Should all providers fail, revert back to the first available generic provider after Alchemy has been used as a last resort. --- .../services/chain/serial-fallback-provider.ts | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index e0d9d724fb..ab6e17baa7 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -583,7 +583,19 @@ export default class SerialFallbackProvider extends JsonRpcProvider { this.currentProviderIndex += 1 // Try again with the next provider. await this.reconnectProvider() - return this.routeRpcCall(messageId) + + const isAlchemyFallback = + this.alchemyProvider && this.currentProvider === this.alchemyProvider + + return this.routeRpcCall(messageId).finally(() => { + // If every other provider failed and we're on the alchemy provider, + // reconnect to the first provider once we've handled this request + // as we should limit relying on alchemy as a fallback + if (isAlchemyFallback) { + this.currentProviderIndex = 0 + this.reconnectProvider() + } + }) } /** From c20833c742dce89841f224f699ef21f7202a2837 Mon Sep 17 00:00:00 2001 From: hyphenized <28708889+hyphenized@users.noreply.github.com> Date: Wed, 2 Aug 2023 00:24:05 -0500 Subject: [PATCH 5/7] Cache created providers This eliminates redundant network checks performed by ethers whenever we experience a fail over and a provider must be re-created --- .../services/chain/serial-fallback-provider.ts | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index ab6e17baa7..a949b77e68 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -208,6 +208,9 @@ export default class SerialFallbackProvider extends JsonRpcProvider { private customProviderSupportedMethods: string[] = [] + private cachedProvidersByIndex: Record = + {} + /** * This object holds all messages that are either being sent to a provider * and waiting for a response, or are in the process of being backed off due @@ -303,6 +306,7 @@ export default class SerialFallbackProvider extends JsonRpcProvider { super(firstProvider.connection, firstProvider.network) this.currentProvider = firstProvider + this.cachedProvidersByIndex[0] = firstProvider if (alchemyProviderCreator) { this.supportsAlchemy = true @@ -849,7 +853,14 @@ export default class SerialFallbackProvider extends JsonRpcProvider { "..." ) - this.currentProvider = this.providerCreators[this.currentProviderIndex]() + const cachedProvider = + this.cachedProvidersByIndex[this.currentProviderIndex] ?? + this.providerCreators[this.currentProviderIndex]() + + this.cachedProvidersByIndex[this.currentProviderIndex] = cachedProvider + + this.currentProvider = cachedProvider + await this.resubscribe(this.currentProvider) // TODO After a longer backoff, attempt to reset the current provider to 0. From 7c88a8629088ce2e718e5cbc48571f99534f1a03 Mon Sep 17 00:00:00 2001 From: hyphenized <28708889+hyphenized@users.noreply.github.com> Date: Wed, 2 Aug 2023 00:38:15 -0500 Subject: [PATCH 6/7] Add serial-fallback-provider reliability tester This script creates a proxy server to test the reliability and fail over functionality of our serial fallback provider implementation. This can be beneficial for e2e tests too. --- background/constants/networks.ts | 2 +- scripts/unreliable-rpc-provider-utils.ts | 12 +++ scripts/unreliable-rpc-provider.mjs | 104 +++++++++++++++++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 scripts/unreliable-rpc-provider-utils.ts create mode 100644 scripts/unreliable-rpc-provider.mjs diff --git a/background/constants/networks.ts b/background/constants/networks.ts index 465dcb1c78..0fae207948 100644 --- a/background/constants/networks.ts +++ b/background/constants/networks.ts @@ -183,7 +183,7 @@ export const FLASHBOTS_DOCS_URL = "https://docs.flashbots.net/flashbots-protect/rpc/mev-share" export const CHAIN_ID_TO_RPC_URLS: { - [chainId: string]: Array | undefined + [chainId: string]: string[] } = { [ROOTSTOCK.chainID]: ["https://public-node.rsk.co"], [POLYGON.chainID]: [ diff --git a/scripts/unreliable-rpc-provider-utils.ts b/scripts/unreliable-rpc-provider-utils.ts new file mode 100644 index 0000000000..bd1b955f35 --- /dev/null +++ b/scripts/unreliable-rpc-provider-utils.ts @@ -0,0 +1,12 @@ +export const patchRPCURL = (url: string): string => + `http://localhost:9000?rpc=${url}` + +export const patchRPCURLS = ( + chainIDToRPCMap: Record +): typeof chainIDToRPCMap => + Object.fromEntries( + Object.entries(chainIDToRPCMap).map(([_, urls]) => [ + _, + urls.map(patchRPCURL), + ]) + ) diff --git a/scripts/unreliable-rpc-provider.mjs b/scripts/unreliable-rpc-provider.mjs new file mode 100644 index 0000000000..0bcb029864 --- /dev/null +++ b/scripts/unreliable-rpc-provider.mjs @@ -0,0 +1,104 @@ +/* eslint-disable */ +/** + * This is a simple proxy server that will randomly throttle or timeout + * requests and is used to test the reliability of the SerialFallbackProvider. + */ +import http from "http" +import fetch from "node-fetch" +import _ from "lodash" + +const PORT = 9000 + +const throttleResponse = { + jsonrpc: "2 .0 ", + id: null, + error: "Whomp, too many requests", +} +1 +/** + * Returns a function that will return true `chance` percent of the time. + */ +function makeChance(chance) { + return () => Math.random() < chance / 100 +} + +const wait = (ms) => new Promise((r) => setTimeout(r, ms)) + +const isThrottled = makeChance(20) +const isTimeout = makeChance(20) + +const stats = { timeout: 0, throttle: 0, proxied: 0 } + +/** + * Time until the server will timeout a request and close the socket. + */ +const RESPONSE_TIMEOUT = 20_000 + +const server = http.createServer(async (req, res) => { + const payload = JSON.stringify(throttleResponse) + + const url = new URL(req.url, `http://${req.headers.host}`) + + const targetURL = url.searchParams.get("rpc") + + if (!targetURL) { + res.writeHead(500).end("missing target url") + return + } + + if (isThrottled()) { + stats.throttle += 1 + + res + .writeHead(429, { + "content-type": "application/json", + "content-length": Buffer.byteLength(payload), + "Access-Control-Allow-Origin": "no-cors", + }) + .end(payload) + } else if (isTimeout()) { + stats.timeout += 1 + + await wait(RESPONSE_TIMEOUT) + res.destroy() + } else { + stats.proxied += 1 + + const data = await new Promise((resolve, rej) => { + let body = "" + req + .on("data", (chunk) => { + body += chunk + }) + .on("end", () => { + resolve(body) + }) + .on("error", () => { + rej() + req.destroy() + }) + }) + + fetch(targetURL, { + method: req.method, + ...(req.method === "POST" ? { body: data } : {}), + headers: _.omit(req.headers, "host"), + }) + .then(async (targetResponse) => { + const body = await targetResponse.text() + res + .writeHead(targetResponse.status, { + "Content-Type": "application/json", + }) + .end(body) + }) + .catch((err) => { + console.log("Error retrieving: ", targetURL, err) + res.writeHead(500).end() + }) + } +}) + +server.listen(PORT) + +setInterval(() => console.log(stats), 5000) From 8a90482834d7e7ffe361fe6511bfe3c71113d3de Mon Sep 17 00:00:00 2001 From: hyphenized <28708889+hyphenized@users.noreply.github.com> Date: Wed, 2 Aug 2023 01:42:13 -0500 Subject: [PATCH 7/7] Correctly fallback on bad responses Switch to the next provider if we exceed the number of retries for bad responses --- .../chain/serial-fallback-provider.ts | 12 +++++++-- ...rial-fallback-provider.integration.test.ts | 25 ++++++++++++++++--- background/tests/utils.ts | 2 ++ 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/background/services/chain/serial-fallback-provider.ts b/background/services/chain/serial-fallback-provider.ts index a949b77e68..ba7293ca75 100644 --- a/background/services/chain/serial-fallback-provider.ts +++ b/background/services/chain/serial-fallback-provider.ts @@ -437,9 +437,17 @@ export default class SerialFallbackProvider extends JsonRpcProvider { throw error } else if ( // If we received some bogus response, let's try again - stringifiedError.match(/bad result from backend/) && - this.shouldSendMessageOnNextProvider(messageId) + stringifiedError.match(/bad result from backend/) ) { + if ( + // If there is another provider to try and we have exceeded the + // number of retries try to send the message on that provider + this.currentProviderIndex + 1 < this.providerCreators.length && + this.shouldSendMessageOnNextProvider(messageId) + ) { + return await this.attemptToSendMessageOnNewProvider(messageId) + } + const backoff = this.backoffFor(messageId) logger.debug( "Backing off for", diff --git a/background/services/chain/tests/serial-fallback-provider.integration.test.ts b/background/services/chain/tests/serial-fallback-provider.integration.test.ts index f940e33f87..1b5cd11bf7 100644 --- a/background/services/chain/tests/serial-fallback-provider.integration.test.ts +++ b/background/services/chain/tests/serial-fallback-provider.integration.test.ts @@ -3,6 +3,7 @@ import Sinon, * as sinon from "sinon" import { ETHEREUM } from "../../../constants" import { wait } from "../../../lib/utils" import SerialFallbackProvider from "../serial-fallback-provider" +import { waitFor } from "../../../tests/utils" const sandbox = sinon.createSandbox() @@ -19,7 +20,9 @@ describe("Serial Fallback Provider", () => { .callsFake(async () => "success") alchemySendStub = sandbox .stub(mockAlchemyProvider, "send") - .callsFake(async () => "success") + .callsFake(async () => { + return "success" + }) fallbackProvider = new SerialFallbackProvider(ETHEREUM.chainID, [ { type: "generic", @@ -74,6 +77,9 @@ describe("Serial Fallback Provider", () => { genericSendStub.onCall(0).throws("bad response") genericSendStub.onCall(1).returns(ETHEREUM.chainID) genericSendStub.onCall(2).returns("success") + + await waitFor(() => expect(genericSendStub.called).toEqual(true)) + await expect( fallbackProvider.send("eth_getBalance", []) ).resolves.toEqual("success") @@ -87,6 +93,9 @@ describe("Serial Fallback Provider", () => { genericSendStub.onCall(0).throws("missing response") genericSendStub.onCall(1).returns(ETHEREUM.chainID) genericSendStub.onCall(2).returns("success") + + await waitFor(() => expect(genericSendStub.called).toEqual(true)) + await expect( fallbackProvider.send("eth_getBalance", []) ).resolves.toEqual("success") @@ -100,6 +109,9 @@ describe("Serial Fallback Provider", () => { genericSendStub.onCall(0).throws("we can't execute this request") genericSendStub.onCall(1).returns(ETHEREUM.chainID) genericSendStub.onCall(2).returns("success") + + await waitFor(() => expect(genericSendStub.called).toEqual(true)) + await expect( fallbackProvider.send("eth_getBalance", []) ).resolves.toEqual("success") @@ -110,14 +122,16 @@ describe("Serial Fallback Provider", () => { }) it("should switch to next provider after three bad responses", async () => { - genericSendStub.throws("bad response") + genericSendStub.throws("bad result from backend") alchemySendStub.onCall(0).returns(ETHEREUM.chainID) alchemySendStub.onCall(1).returns("success") + await waitFor(() => expect(genericSendStub.called).toEqual(true)) + await expect( fallbackProvider.send("eth_getBalance", []) ).resolves.toEqual("success") - // 1 try and 3 retries of eth_getBalance + expect( genericSendStub.args.filter((args) => args[0] === "eth_getBalance") .length @@ -204,12 +218,15 @@ describe("Serial Fallback Provider", () => { // Accessing private property // eslint-disable-next-line @typescript-eslint/no-explicit-any expect((fallbackProvider as any).currentProviderIndex).toEqual(0) + + await waitFor(() => expect(genericSendStub.called).toEqual(true)) + await expect( fallbackProvider.send("eth_getBalance", []) ).resolves.toEqual("success") // Accessing private property // eslint-disable-next-line @typescript-eslint/no-explicit-any - expect((fallbackProvider as any).currentProviderIndex).toEqual(1) + expect((fallbackProvider as any).currentProviderIndex).toEqual(0) }) }) }) diff --git a/background/tests/utils.ts b/background/tests/utils.ts index 0260aab98b..102b7368a5 100644 --- a/background/tests/utils.ts +++ b/background/tests/utils.ts @@ -1,4 +1,6 @@ import browser from "webextension-polyfill" +// eslint-disable-next-line import/no-extraneous-dependencies +export { waitFor } from "@testing-library/dom" type LocalStorageMock = Record>