diff --git a/package-lock.json b/package-lock.json index 2b9fceb..b6f7384 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,8 +1,325 @@ { "name": "rfcs", "version": "0.0.0", - "lockfileVersion": 1, + "lockfileVersion": 2, "requires": true, + "packages": { + "": { + "name": "rfcs", + "version": "0.0.0", + "dependencies": { + "markdownlint-cli": "^0.24.0" + }, + "devDependencies": {} + }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/balanced-match": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", + "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" + }, + "node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/commander": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.9.0.tgz", + "integrity": "sha1-nJkJQXbhIkDLItbFFGCYQA/g99Q=", + "dependencies": { + "graceful-readlink": ">= 1.0.0" + }, + "engines": { + "node": ">= 0.6.x" + } + }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" + }, + "node_modules/deep-extend": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.5.1.tgz", + "integrity": "sha512-N8vBdOa+DF7zkRrDCsaOXoCs/E2fJfx9B9MrKnnSiHNh4ws7eSys6YQE4KvT1cecKmOASYQBhbKjeuDD9lT81w==", + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/entities": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.0.3.tgz", + "integrity": "sha512-MyoZ0jgnLvB2X3Lg5HqpFmn1kybDiIfEQmKzTb5apr51Rb+T3KdmMiqa70T+bhGnyv7bQ6WMj2QMHpGMmlrUYQ==" + }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" + }, + "node_modules/get-stdin": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/get-stdin/-/get-stdin-5.0.1.tgz", + "integrity": "sha1-Ei4WFZHiH/TFJTAwVpPyDmOTo5g=", + "engines": { + "node": ">=0.12.0" + } + }, + "node_modules/glob": { + "version": "7.1.6", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", + "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/graceful-readlink": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/graceful-readlink/-/graceful-readlink-1.0.1.tgz", + "integrity": "sha1-TK+tdrxi8C+gObL5Tpo906ORpyU=" + }, + "node_modules/ignore": { + "version": "5.1.8", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.1.8.tgz", + "integrity": "sha512-BMpfD7PpiETpBl/A6S498BaIJ6Y/ABT93ETbby2fP00v4EbvPBXWEoaR1UBPKs3iR53pJY7EtZk5KACI57i1Uw==", + "engines": { + "node": ">= 4" + } + }, + "node_modules/inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "dependencies": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/ini": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz", + "integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==", + "deprecated": "Please update to ini >=1.3.6 to avoid a prototype pollution issue", + "engines": { + "node": "*" + } + }, + "node_modules/js-yaml": { + "version": "3.13.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.13.1.tgz", + "integrity": "sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw==", + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/jsonc-parser": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-2.2.1.tgz", + "integrity": "sha512-o6/yDBYccGvTz1+QFevz6l6OBZ2+fMVu2JZ9CIhzsYRX4mjaK5IyX9eldUdCmga16zlgQxyrj5pt9kzuj2C02w==" + }, + "node_modules/linkify-it": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.2.tgz", + "integrity": "sha512-gDBO4aHNZS6coiZCKVhSNh43F9ioIL4JwRjLZPkoLIY4yZFwg264Y5lu2x6rb1Js42Gh6Yqm2f6L2AJcnkzinQ==", + "dependencies": { + "uc.micro": "^1.0.1" + } + }, + "node_modules/lodash.differencewith": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.differencewith/-/lodash.differencewith-4.5.0.tgz", + "integrity": "sha1-uvr7yRi1UVTheRdqALsK76rIVLc=" + }, + "node_modules/lodash.flatten": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz", + "integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8=" + }, + "node_modules/markdown-it": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-11.0.0.tgz", + "integrity": "sha512-+CvOnmbSubmQFSA9dKz1BRiaSMV7rhexl3sngKqFyXSagoA3fBdJQ8oZWtRy2knXdpDXaBw44euz37DeJQ9asg==", + "dependencies": { + "argparse": "^1.0.7", + "entities": "~2.0.0", + "linkify-it": "^3.0.1", + "mdurl": "^1.0.1", + "uc.micro": "^1.0.5" + }, + "bin": { + "markdown-it": "bin/markdown-it.js" + } + }, + "node_modules/markdownlint": { + "version": "0.21.1", + "resolved": "https://registry.npmjs.org/markdownlint/-/markdownlint-0.21.1.tgz", + "integrity": "sha512-8kc88w5dyEzlmOWIElp8J17qBgzouOQfJ0LhCcpBFrwgyYK6JTKvILsk4FCEkiNqHkTxwxopT2RS2DYb/10qqg==", + "dependencies": { + "markdown-it": "11.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/markdownlint-cli": { + "version": "0.24.0", + "resolved": "https://registry.npmjs.org/markdownlint-cli/-/markdownlint-cli-0.24.0.tgz", + "integrity": "sha512-AusUxaX4sFayUBFTCKeHc8+fq73KFqIUW+ZZZYyQ/BvY0MoGAnE2C/3xiawSE7WXmpmguaWzhrXRuY6IrOLX7A==", + "dependencies": { + "commander": "~2.9.0", + "deep-extend": "~0.5.1", + "get-stdin": "~5.0.1", + "glob": "~7.1.2", + "ignore": "~5.1.4", + "js-yaml": "~3.13.1", + "jsonc-parser": "~2.2.0", + "lodash.differencewith": "~4.5.0", + "lodash.flatten": "~4.4.0", + "markdownlint": "~0.21.0", + "markdownlint-rule-helpers": "~0.12.0", + "minimatch": "~3.0.4", + "minimist": "~1.2.5", + "rc": "~1.2.7" + }, + "bin": { + "markdownlint": "markdownlint.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/markdownlint-rule-helpers": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/markdownlint-rule-helpers/-/markdownlint-rule-helpers-0.12.0.tgz", + "integrity": "sha512-Q7qfAk+AJvx82ZY52OByC4yjoQYryOZt6D8TKrZJIwCfhZvcj8vCQNuwDqILushtDBTvGFmUPq+uhOb1KIMi6A==" + }, + "node_modules/mdurl": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", + "integrity": "sha1-/oWy7HWlkDfyrf7BAP1sYBdhFS4=" + }, + "node_modules/minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/minimist": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-FM9nNUYrRBAELZQT3xeZQ7fmMOBg6nWNmJKTcgsJeaLstP/UODVpGsr5OhXhhXg6f+qtJ8uiZ+PUxkDWcgIXLw==" + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/rc/node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw=" + }, + "node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha1-PFMZQukIwml8DsNEhYwobHygpgo=", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/uc.micro": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", + "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==" + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" + } + }, "dependencies": { "argparse": { "version": "1.0.10", diff --git a/text/2021-12-22-blockchaos.md b/text/2021-12-22-blockchaos.md new file mode 100644 index 0000000..1c5f069 --- /dev/null +++ b/text/2021-12-22-blockchaos.md @@ -0,0 +1,199 @@ +# Block schedule injection + +## Background + +Current `IOChaos` injection works on file system layer. It will setup a FUSE and +inject delay / corrupt into the request. This solution can have its own +advantages. Meanwhile, the disadvantages also bothers us a lot. + +Advantages: + +1. It will never have chance to break the host system, and is relatively safe. +2. Have knowledge about the file system, which means it can filter through path, + owner, or anything which only occurs upon the file system. + +Disadvantages: + +1. The performance impact is really big even without injecting any delay, which + makes it impossible to do quantitative analysis. +2. The target process will be paused at the beginning of the injecting. The time + may differ from 1s to 4s according to the openned files. + +In this RFC, I will propose a new mechanism to inject delay, IOPS limitation (or +corrupt, in the future) to a block device. The implementation is split into a +standalone repo: +[chaos-mesh/chaos-driver](https://github.com/chaos-mesh/chaos-driver). And the +Chaos Mesh will convert an injection request on Pod to an injection request on +block device to the chaos-driver. + +## Proposal + +The implementation can be saw in three parts: + +1. Chaos Mesh. Reconcile the `BlockChaos` resource, select related PV and node, + and send the request to the corresponding chaos-daemon. +2. Chaos Daemon. It reads the grpc request (for injection and recover), and + then use the client provided by chaos-driver to talk with `/dev/chaos`. +3. Chaos Driver. It handles the operation on `/dev/chaos`, and add the injection + on the injection list. + +### Chaos Mesh + +The scheme of `BlockChaos` is generally like: + +```yaml +apiVersion: chaos-mesh.org/v1alpha1 +kind: BlockChaos +metadata: + name: block-chaos +spec: + mode: one + selector: + labelSelectors: + "app.kubernetes.io/component": "tikv" + volumeName: tikv + action: limit + iops: 5000 +status: + ... + ids: + default/tikv-0/tikv: 1 +``` + +The record will be like: `default/tikv-0/tikv`. The first part of it is the +namespace, the second part of it is the pod name, and the third one is the +volume name. + +To inject a chaos, the Chaos Mesh will read the `Pod`, find the corresponding +volume entry in the `Pod` spec, and then get the `PVC`. By reading +`PVC.spec.volumeName`, the controller can get the `PV`. + +If the source of the `PV` is `LocalVolume` or `HostPathVolume` (which means +either `LocalVolumeSource` or `HostPathVolumeSource` is not empty), this PV can +be injected. The controller can get a path, which represents a directory on host +or a block device on host. + +Then the controller can send this path and injecting spec to the chaos-daemon on +corresponding node, and will receive an id from the chaos-daemon. + +To recover a chaos, the controller only needs to send the id to related +chaos-daemon. + +#### Requirement + +1. The volume entry of the `Pod` must be `PVC`. +2. The source of the `PV` must be `LocalVolume` or `HostPath`. `VolumeMode` must + be `FileSystem` + +### Chaos Daemon + +The chaos daemon can use the go client provided by chaos-driver directly. + +Before injecting the chaos, the chaos daemon should make sure the `chaos_driver` +module has been loaded. + +#### Get device from path + +The chaos daemon should get a block device from the path. + +If the path is a directory, then we can read the `/proc/mounts` and find the +mount source of the path. If the mount source is still a directory, which means +this mount is a bind mount, we can handle this situation recursively until we +get a block device. If it's neither a block device nor a directory, it's an +invalid value. + +This device can also be a partition: `sda1` or `nvme0n1p1`. If the device name +doesn't have a corresponding directory in `/sys/block`, it is a partition. We +can get the parent device of it through `basename /sys/class/block/%s/..`. For +example, the `/sys/class/block/nvm0n1p1` is a soft link to +`../../devices/pci0000:00/0000:00:01.1/0000:01:00.0/nvme/nvme0/nvme0n1/nvm0n1p1`, +then the `..` of it is `nvme0n1`. + +#### Inject + +After getting the block device, the chaos daemon should read +`/sys/block/%s/queue/scheduler` to make sure it is `ioem` or `ioem-mq`, then it +can be injected through `github.com/chaos-mesh/chaos-driver/pkg/client`: + +```go +id, err := c.InjectIOEMDelay(dev_path, op, pidNs, delay, corr, jitter) +``` + +The id should be returned to the controller. The `dev_path` is `/dev/xxx`, the +`pidNs` is the pid of the container, and the `delay`, `corr`, `jitter` is the +corresponding value in the chaos spec. + +#### Recover + +Recover doesn't need the device path: + +```go +err = c.Recover(id) +``` + +### Chaos Driver + +Chaos Driver is designed for many various kinds of injection. In this RFC, only +IOEM (which is the io scheduler designed for injecting delay and limit) will be +discussed. + +The IOEM scheduler has two main structure: `ioem_data` and `irl`. The +`ioem_data` contains two data structure: one linked list (called +"waiting_queue") and an rb_tree. The `irl` means "IOEM Request Limit", which is +used to limit the IOPS. + +Every hardware queue will be binded by one `ioem_data`, every hardware will be +binded by one `irl`. For multiqueue scheduler, one `irl` may be shared by +multiple `ioem_data`. + +#### IRL + +The design of IRL is quire simple: it contains two fields, one for `quota` and +one for `counter`, and it has one method: "dispatch". Everytime this functions +is called, the counter will increase by one, and if the counter is still smaller +than the `quota`, it will return successfully, or it will return a false. + +A hrtimer will be setuped to reset the `counter` to 0 for every period. + +#### IOEM DATA + +An IO scheduler has two main functions: insert request and dispatch request. + +##### Insert Request + +When a request reaches the scheduler, it will iterate through the registered +injections to see whether this request should be affected by the limit and +delay. + +If this request should be injected with a delay, then the `time_to_send` of this +request is marked as `now + delay`, or it will be marked as `now`. + +If this request should be injected by the limit, the `ioem_limit_should_affect` +will be marked as `true`, otherwise `false`. + +Then this request will be inserted into the rbtree, sorted with the +`time_to_send`. + +##### Dispatch + +The dispatch will be triggered by hardware driver, background task, the +scheduler itself, etc... When the dispatch is called, the scheduler can dispatch +one (or zero) request to the hardware. + +The scheduler will check the first entry of `waiting_list`, to see whether there +are requests waiting for the limit quota, and whether the quota is enough to +send a request. If both quota and request is availabe, return the request. + +If either the quota or request is unavailable, the scheduler will iterate the +`rbtree` until the `time_to_send` is greater than now. For any request whose +`time_to_send` is smaller than now, it can be tried to dispatch through `irl` +(or directly, if it's not affected by the limit). + +If the quota is not enough to send the request in rbtree, pop it out and +reinsert into the waiting queue. + +If no request is sent in one dispatch, but there are still request inside the +rb_tree or waiting list, the scheduler should calculate a earliest time when a +request could be sent. This time should be the next time when the counter in irl +is reset, or the smallest `time_to_send` in the rbtree. This time will be used +to setup a timer to trigger the dispatch.