Skip to content

Commit

Permalink
Change current provider on timeouted RPC requests (#3587)
Browse files Browse the repository at this point in the history
### What
If some RPC request fails because of a timeout we should change to a
different RPC provider. Tested on reproduced problem with `ankr` RPC

#### TODO:

- [x] `await this.currentProvider.send(method, params)` - is swallowing
429 request's error, can we rethrow it so we don't have to wait for
timeouts?
- [x] we should loop providers - if the last provider on the list has
been used let's set the first provider as current - lmk if you think it
is a good idea 🤔

## How to test
- Patch default providers
https://github.com/tahowallet/extension/blob/7c88a8629088ce2e718e5cbc48571f99534f1a03/background/constants/networks.ts#L185
using `patchRPCURLS` from
https://github.com/tahowallet/extension/blob/7c88a8629088ce2e718e5cbc48571f99534f1a03/scripts/unreliable-rpc-provider-utils.ts
- Run `node scripts/unreliable-rpc-provider.mjs`
- Reinstall the extension, open the background inspector and check that
providers are being switched on timeouts/server errors
- Onboard an account, perform a few actions. Verify the wallet is
handling errors gracefully

Latest build:
[extension-builds-3587](https://github.com/tahowallet/extension/suites/14751539578/artifacts/838585130)
(as of Wed, 02 Aug 2023 07:27:53 GMT).
  • Loading branch information
kkosiorowska authored Aug 2, 2023
2 parents f56ce73 + 7f4a557 commit bdb3317
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 46 deletions.
2 changes: 1 addition & 1 deletion background/constants/networks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ export const FLASHBOTS_DOCS_URL =
"https://docs.flashbots.net/flashbots-protect/rpc/mev-share"

export const CHAIN_ID_TO_RPC_URLS: {
[chainId: string]: Array<string> | undefined
[chainId: string]: string[]
} = {
[ROOTSTOCK.chainID]: ["https://public-node.rsk.co"],
[POLYGON.chainID]: [
Expand Down
138 changes: 97 additions & 41 deletions background/services/chain/serial-fallback-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ const WAIT_BEFORE_SEND_AGAIN = 100
const BALANCE_TTL = 1 * SECOND
// How often to cleanup our hasCode and balance caches.
const CACHE_CLEANUP_INTERVAL = 10 * SECOND
// How long to wait for a provider to respond before falling back to the next provider.
const PROVIDER_REQUEST_TIMEOUT = 5 * SECOND

/**
* Wait the given number of ms, then run the provided function. Returns a
* promise that will resolve after the delay has elapsed and the passed
Expand Down Expand Up @@ -205,6 +208,9 @@ export default class SerialFallbackProvider extends JsonRpcProvider {

private customProviderSupportedMethods: string[] = []

private cachedProvidersByIndex: Record<string, JsonRpcProvider | undefined> =
{}

/**
* This object holds all messages that are either being sent to a provider
* and waiting for a response, or are in the process of being backed off due
Expand Down Expand Up @@ -300,6 +306,7 @@ export default class SerialFallbackProvider extends JsonRpcProvider {
super(firstProvider.connection, firstProvider.network)

this.currentProvider = firstProvider
this.cachedProvidersByIndex[0] = firstProvider

if (alchemyProviderCreator) {
this.supportsAlchemy = true
Expand Down Expand Up @@ -393,49 +400,71 @@ export default class SerialFallbackProvider extends JsonRpcProvider {
if (
/**
* WebSocket is already in CLOSING - We are reconnecting
* bad response - error on the endpoint provider's side
* missing response - we might be disconnected due to network instability
* we can't execute this request - ankr rate limit hit
* - bad response
* error on the endpoint provider's side
* - missing response
* We might be disconnected due to network instability
* - we can't execute this request
* ankr rate limit hit
* - failed response
* fetchJson default "fallback" error, generally thrown after 429s
* - TIMEOUT
* fetchJson timed out, we could retry but it's safer to just fail over
* - NETWORK_ERROR
* Any other network error, including no-network errors
*
* Note: We can't use ether's generic SERVER_ERROR because it's also
* used for invalid responses from the server, which we can retry on
*/
stringifiedError.match(
/WebSocket is already in CLOSING|bad response|missing response|we can't execute this request/
/WebSocket is already in CLOSING|bad response|missing response|we can't execute this request|failed response|TIMEOUT|NETWORK_ERROR/
)
) {
if (this.shouldSendMessageOnNextProvider(messageId)) {
// If there is another provider to try - try to send the message on that provider
if (this.currentProviderIndex + 1 < this.providerCreators.length) {
return await this.attemptToSendMessageOnNewProvider(messageId)
}

// Otherwise, set us up for the next call, but fail the
// current one since we've gone through every available provider. Note
// that this may happen over time, but we still fail the request that
// hits the end of the list.
this.currentProviderIndex = 0

// Reconnect, but don't wait for the connection to go through.
this.reconnectProvider()
delete this.messagesToSend[messageId]
throw error
} else {
const backoff = this.backoffFor(messageId)
logger.debug(
"Backing off for",
backoff,
"and retrying: ",
method,
params
)
// If there is another provider to try - try to send the message on that provider
if (this.currentProviderIndex + 1 < this.providerCreators.length) {
return await this.attemptToSendMessageOnNewProvider(messageId)
}

return await waitAnd(backoff, async () => {
if (isClosedOrClosingWebSocketProvider(this.currentProvider)) {
await this.reconnectProvider()
}
// Otherwise, set us up for the next call, but fail the
// current one since we've gone through every available provider. Note
// that this may happen over time, but we still fail the request that
// hits the end of the list.
this.currentProviderIndex = 0

logger.debug("Retrying", method, params)
return this.routeRpcCall(messageId)
})
// Reconnect, but don't wait for the connection to go through.
this.reconnectProvider()
delete this.messagesToSend[messageId]
throw error
} else if (
// If we received some bogus response, let's try again
stringifiedError.match(/bad result from backend/)
) {
if (
// If there is another provider to try and we have exceeded the
// number of retries try to send the message on that provider
this.currentProviderIndex + 1 < this.providerCreators.length &&
this.shouldSendMessageOnNextProvider(messageId)
) {
return await this.attemptToSendMessageOnNewProvider(messageId)
}

const backoff = this.backoffFor(messageId)
logger.debug(
"Backing off for",
backoff,
"and retrying: ",
method,
params
)

return await waitAnd(backoff, async () => {
if (isClosedOrClosingWebSocketProvider(this.currentProvider)) {
await this.reconnectProvider()
}

logger.debug("Retrying", method, params)
return this.routeRpcCall(messageId)
})
}

logger.debug(
Expand Down Expand Up @@ -566,7 +595,19 @@ export default class SerialFallbackProvider extends JsonRpcProvider {
this.currentProviderIndex += 1
// Try again with the next provider.
await this.reconnectProvider()
return this.routeRpcCall(messageId)

const isAlchemyFallback =
this.alchemyProvider && this.currentProvider === this.alchemyProvider

return this.routeRpcCall(messageId).finally(() => {
// If every other provider failed and we're on the alchemy provider,
// reconnect to the first provider once we've handled this request
// as we should limit relying on alchemy as a fallback
if (isAlchemyFallback) {
this.currentProviderIndex = 0
this.reconnectProvider()
}
})
}

/**
Expand Down Expand Up @@ -820,7 +861,14 @@ export default class SerialFallbackProvider extends JsonRpcProvider {
"..."
)

this.currentProvider = this.providerCreators[this.currentProviderIndex]()
const cachedProvider =
this.cachedProvidersByIndex[this.currentProviderIndex] ??
this.providerCreators[this.currentProviderIndex]()

this.cachedProvidersByIndex[this.currentProviderIndex] = cachedProvider

this.currentProvider = cachedProvider

await this.resubscribe(this.currentProvider)

// TODO After a longer backoff, attempt to reset the current provider to 0.
Expand Down Expand Up @@ -1006,11 +1054,19 @@ function getProviderCreator(
return new WebSocketProvider(rpcUrl)
}

if (/rpc\.ankr\.com|1rpc\.io|polygon-rpc\.com/.test(url.hostname)) {
return new JsonRpcBatchProvider(rpcUrl)
if (/rpc\.ankr\.com|1rpc\.io|polygon-rpc\.com/.test(url.href)) {
return new JsonRpcBatchProvider({
url: rpcUrl,
throttleLimit: 1,
timeout: PROVIDER_REQUEST_TIMEOUT,
})
}

return new JsonRpcProvider(rpcUrl)
return new JsonRpcProvider({
url: rpcUrl,
throttleLimit: 1,
timeout: PROVIDER_REQUEST_TIMEOUT,
})
}

export function makeFlashbotsProviderCreator(): ProviderCreator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import Sinon, * as sinon from "sinon"
import { ETHEREUM } from "../../../constants"
import { wait } from "../../../lib/utils"
import SerialFallbackProvider from "../serial-fallback-provider"
import { waitFor } from "../../../tests/utils"

const sandbox = sinon.createSandbox()

Expand All @@ -19,7 +20,9 @@ describe("Serial Fallback Provider", () => {
.callsFake(async () => "success")
alchemySendStub = sandbox
.stub(mockAlchemyProvider, "send")
.callsFake(async () => "success")
.callsFake(async () => {
return "success"
})
fallbackProvider = new SerialFallbackProvider(ETHEREUM.chainID, [
{
type: "generic",
Expand Down Expand Up @@ -74,6 +77,9 @@ describe("Serial Fallback Provider", () => {
genericSendStub.onCall(0).throws("bad response")
genericSendStub.onCall(1).returns(ETHEREUM.chainID)
genericSendStub.onCall(2).returns("success")

await waitFor(() => expect(genericSendStub.called).toEqual(true))

await expect(
fallbackProvider.send("eth_getBalance", [])
).resolves.toEqual("success")
Expand All @@ -87,6 +93,9 @@ describe("Serial Fallback Provider", () => {
genericSendStub.onCall(0).throws("missing response")
genericSendStub.onCall(1).returns(ETHEREUM.chainID)
genericSendStub.onCall(2).returns("success")

await waitFor(() => expect(genericSendStub.called).toEqual(true))

await expect(
fallbackProvider.send("eth_getBalance", [])
).resolves.toEqual("success")
Expand All @@ -100,6 +109,9 @@ describe("Serial Fallback Provider", () => {
genericSendStub.onCall(0).throws("we can't execute this request")
genericSendStub.onCall(1).returns(ETHEREUM.chainID)
genericSendStub.onCall(2).returns("success")

await waitFor(() => expect(genericSendStub.called).toEqual(true))

await expect(
fallbackProvider.send("eth_getBalance", [])
).resolves.toEqual("success")
Expand All @@ -110,14 +122,16 @@ describe("Serial Fallback Provider", () => {
})

it("should switch to next provider after three bad responses", async () => {
genericSendStub.throws("bad response")
genericSendStub.throws("bad result from backend")
alchemySendStub.onCall(0).returns(ETHEREUM.chainID)
alchemySendStub.onCall(1).returns("success")

await waitFor(() => expect(genericSendStub.called).toEqual(true))

await expect(
fallbackProvider.send("eth_getBalance", [])
).resolves.toEqual("success")
// 1 try and 3 retries of eth_getBalance

expect(
genericSendStub.args.filter((args) => args[0] === "eth_getBalance")
.length
Expand Down Expand Up @@ -204,12 +218,15 @@ describe("Serial Fallback Provider", () => {
// Accessing private property
// eslint-disable-next-line @typescript-eslint/no-explicit-any
expect((fallbackProvider as any).currentProviderIndex).toEqual(0)

await waitFor(() => expect(genericSendStub.called).toEqual(true))

await expect(
fallbackProvider.send("eth_getBalance", [])
).resolves.toEqual("success")
// Accessing private property
// eslint-disable-next-line @typescript-eslint/no-explicit-any
expect((fallbackProvider as any).currentProviderIndex).toEqual(1)
expect((fallbackProvider as any).currentProviderIndex).toEqual(0)
})
})
})
2 changes: 2 additions & 0 deletions background/tests/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import browser from "webextension-polyfill"
// eslint-disable-next-line import/no-extraneous-dependencies
export { waitFor } from "@testing-library/dom"

type LocalStorageMock = Record<string, Record<string, unknown>>

Expand Down
12 changes: 12 additions & 0 deletions scripts/unreliable-rpc-provider-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
export const patchRPCURL = (url: string): string =>
`http://localhost:9000?rpc=${url}`

export const patchRPCURLS = (
chainIDToRPCMap: Record<string, string[]>
): typeof chainIDToRPCMap =>
Object.fromEntries(
Object.entries(chainIDToRPCMap).map(([_, urls]) => [
_,
urls.map(patchRPCURL),
])
)
Loading

0 comments on commit bdb3317

Please sign in to comment.