-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathserverless.yml
executable file
·260 lines (252 loc) · 8.28 KB
/
serverless.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# !!! NOTICE: Modify config/config.yml to customize deployment
custom:
config: ${file(config/config.yml)}
service: improveai-${self:custom.config.organization}-${self:custom.config.project}
plugins:
- serverless-step-functions
provider:
name: aws
region: ${self:custom.config.aws_region}
runtime: python3.8
memorySize: 192
timeout: 900
ecr:
images:
ingest:
path: ./src/ingest/
trainer:
path: ./src/trainer/
# added for macOS <-> AWS build compatibility
platform: linux/amd64
# figure out how to handle --secrets
iam:
role:
statements:
- Effect: Allow
Action:
- "firehose:*"
Resource: { Fn::GetAtt: [Firehose, Arn] }
- Effect: Allow
Action:
- "sagemaker:*"
Resource: '*'
- Effect: Allow
Action:
- "iam:PassRole"
Resource: { Fn::GetAtt: [ SagemakerExecutionRole, Arn ] }
- Effect: Allow
Action:
- 's3:*'
Resource: "arn:aws:s3:::${self:service}-${opt:stage, self:provider.stage}-**"
environment:
SERVICE: ${self:service}
STAGE: ${opt:stage, self:provider.stage}
FIREHOSE_BUCKET: !Ref FirehoseS3Bucket
TRAIN_BUCKET: !Ref TrainS3Bucket
MODELS_BUCKET: !Ref ModelsS3Bucket
package:
patterns:
- '!./**'
- src/**
- config/**
functions:
track:
description: Decision & Reward Tracker HTTPS API
timeout: 6
handler: src/track/http_api.track
runtime: nodejs14.x
url:
cors: true
environment:
FIREHOSE_DELIVERY_STREAM_NAME: !Ref Firehose
ingestFirehose: # if renamed, FirehoseBucket event notifications must be manually deleted
description: Ingest New Firehose File to Train Bucket
memorySize: 512
image:
name: ingest
events:
- s3:
bucket: !Ref FirehoseS3Bucket
existing: true # created in resources
event: s3:ObjectCreated:*
filterPartitionsToGroom:
description: Filter Groups of Rewarded Decision .parquet S3 Keys to Groom
image:
name: ingest
command:
- groom.filter_handler
groomPartitions:
description: Merge Rewarded Decision Partitions
memorySize: 1024
image:
name: ingest
command:
- groom.groom_handler
dispatchTrainingJob:
description: Dispatch Decision Model Training Jobs
timeout: 30
handler: src/train/dispatch_job.lambda_handler
environment:
TRAINING_ROLE_ARN: { Fn::GetAtt: [ SagemakerExecutionRole, Arn ] }
SERVICE_NAME: ${self:service}
# ECR repo into which the trainer image was pushed
REPOSITORY_NAME: serverless-${self:service}-${opt:stage, self:provider.stage}
# trainer image tag
IMAGE_TAG: trainer
unpackModels:
handler: src/train/unpack_models.unpack
events:
- s3:
bucket: !Ref TrainS3Bucket
existing: true # created in resources
event: s3:ObjectCreated:*
rules:
- prefix: train_output/models/
- suffix: model.tar.gz
forceDockerPushTrainer: # Serverless wants the image to be used to deploy it
description: Force Serverless to Push Docker Trainer Image to ECR
image:
name: trainer
stepFunctions:
stateMachines:
GroomThenTrain:
name: ${self:service}-${opt:stage, self:provider.stage}-GroomThenTrainStateMachine
events: ${file(src/predeploy.js):trainingScheduleEvents}
definition:
StartAt: FilterPartitionsToGroom
States:
FilterPartitionsToGroom:
Type: Task
Resource:
Fn::GetAtt: [filterPartitionsToGroom, Arn]
ResultPath: $.filter
Next: HasGroups
HasGroups:
Type: Choice
Choices:
- Variable: $.filter.groom_groups[0]
IsPresent: true
Next: GroomMap
Default: Train
GroomMap:
Type: Map
MaxConcurrency: 64
ItemsPath: $.filter.groom_groups
Parameters:
model_name.$: $.model_name
s3_keys.$: $$.Map.Item.Value
Iterator:
StartAt: GroomTask
States:
GroomTask:
Type: Task
Resource:
Fn::GetAtt: [groomPartitions, Arn]
Catch:
- ErrorEquals:
- States.TaskFailed
Next: ErrorHandler
ResultPath: $.error
End: true
ErrorHandler:
Type: Pass
End: true
ResultPath: $.filter.groom_groups # clear filter.groom_groups with null result
Next: FilterPartitionsToGroom # loop back to FilterPartitions
Train:
Type: Task
Resource:
Fn::GetAtt: [dispatchTrainingJob, Arn]
End: true
# validate: true
resources:
Resources:
FirehoseToS3Role:
Type: AWS::IAM::Role
Properties:
RoleName: ${self:service}-${opt:stage, self:provider.stage}-FirehoseToS3Role
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- firehose.amazonaws.com
Action:
- sts:AssumeRole
Policies:
- PolicyName: FirehoseToS3Policy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- s3:AbortMultipartUpload
- s3:GetBucketLocation
- s3:GetObject
- s3:ListBucket
- s3:ListBucketMultipartUploads
- s3:PutObject
Resource: "arn:aws:s3:::${self:service}-${opt:stage, self:provider.stage}-**"
SagemakerExecutionRole:
Type: AWS::IAM::Role
Properties:
RoleName: ${self:service}-${opt:stage, self:provider.stage}-SagemakerExecutionRole
AssumeRolePolicyDocument:
Statement:
- Effect: Allow
Principal:
Service:
- sagemaker.amazonaws.com
Action:
- sts:AssumeRole
ManagedPolicyArns:
- arn:aws:iam::aws:policy/AmazonSageMakerFullAccess
Policies:
- PolicyName: SagemakerExecutionPolicy
PolicyDocument:
Statement:
- Effect: Allow
Action:
- 's3:GetObject'
- 's3:GetBucketLocation'
- 's3:DeleteObject'
- 's3:ListBucket'
- 's3:PutObject'
Resource: "arn:aws:s3:::${self:service}-${opt:stage, self:provider.stage}-**"
FirehoseS3Bucket:
Type: AWS::S3::Bucket
DeletionPolicy: Retain
Properties:
BucketName: ${self:service}-${opt:stage, self:provider.stage}-firehose
IntelligentTieringConfigurations:
- Id: '${self:service}-${opt:stage, self:provider.stage}-FirehoseIntelligentTiering'
Status: Enabled
Tierings: # After 30 days intelligent tiering automatically moves to infrequent access tier
- AccessTier: ARCHIVE_ACCESS
Days: 90
- AccessTier: DEEP_ARCHIVE_ACCESS
Days: 180
Firehose:
Type: AWS::KinesisFirehose::DeliveryStream
Properties:
DeliveryStreamName: ${self:service}-${opt:stage, self:provider.stage}-firehose
S3DestinationConfiguration:
BucketARN:
Fn::Join:
- ''
- - 'arn:aws:s3:::'
- Ref: FirehoseS3Bucket
BufferingHints:
IntervalInSeconds: 900 # max value is 900
SizeInMBs: 128 # max value is 128.
CompressionFormat: "GZIP"
RoleARN: { Fn::GetAtt: [ FirehoseToS3Role, Arn ] }
TrainS3Bucket:
Type: AWS::S3::Bucket
DeletionPolicy: Retain
Properties:
BucketName: ${self:service}-${opt:stage, self:provider.stage}-train
ModelsS3Bucket:
Type: AWS::S3::Bucket
DeletionPolicy: Retain
Properties:
BucketName: ${self:service}-${opt:stage, self:provider.stage}-models