Skip to content

Commit

Permalink
Updated to support FATE v1.7.1 (#506)
Browse files Browse the repository at this point in the history
* feat: support slim FATE

Signed-off-by: ChenLong Ma <[email protected]>
(cherry picked from commit 616b8cc9b483dec6f251fe095cac31a1cd5b8c93)

* update helm chart of FATE to support v1.7.1

Signed-off-by: ChenLong Ma <[email protected]>

* add slim FATE for docker-compose

Signed-off-by: ChenLong Ma <[email protected]>

* fix containers not restart

Signed-off-by: ChenLong Ma <[email protected]>
  • Loading branch information
owlet42 authored Jan 12, 2022
1 parent 0e3a546 commit 946616c
Show file tree
Hide file tree
Showing 31 changed files with 767 additions and 39 deletions.
2 changes: 1 addition & 1 deletion docker-build/.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#PREFIX=federatedai
#IMG_TAG=1.5.1-release
#IMG_TAG=1.7.1-release
2 changes: 1 addition & 1 deletion docker-deploy/.env
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
RegistryURI=
TAG=1.7.0-release
TAG=1.7.1-release
SERVING_TAG=2.0.4-release

# PREFIX: namespace on the registry's server.
Expand Down
39 changes: 37 additions & 2 deletions docker-deploy/generate_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ GenerateConfig() {
cp -r training_template/backends/spark/rabbitmq confs-$party_id/confs/

cp training_template/docker-compose-spark.yml confs-$party_id/docker-compose.yml
sed -i '155,170d' confs-$party_id/docker-compose.yml
sed -i '157,173d' confs-$party_id/docker-compose.yml
fi

if [ "$backend" == "spark_pulsar" ]; then
Expand All @@ -98,7 +98,13 @@ GenerateConfig() {
cp -r training_template/backends/spark/pulsar confs-$party_id/confs/

cp training_template/docker-compose-spark.yml confs-$party_id/docker-compose.yml
sed -i '138,153d' confs-$party_id/docker-compose.yml
sed -i '139,155d' confs-$party_id/docker-compose.yml
fi

if [ "$backend" == "spark_local_pulsar" ]; then
cp -r training_template/backends/spark/nginx confs-$party_id/confs/
cp -r training_template/backends/spark/pulsar confs-$party_id/confs/
cp training_template/docker-compose-spark-slim.yml confs-$party_id/docker-compose.yml
fi

if [ "$backend" == "eggroll" ]; then
Expand Down Expand Up @@ -218,6 +224,14 @@ GenerateConfig() {
sed -i "s/ storage: eggroll/ storage: hdfs/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi

if [ "$backend" == "spark_local_pulsar" ]; then
sed -i 's/proxy: rollsite/proxy: nginx/g' ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml

sed -i "s/ computing: eggroll/ computing: spark/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/ federation: eggroll/ federation: pulsar/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
sed -i "s/ storage: eggroll/ storage: localfs/g" ./confs-$party_id/confs/fate_flow/conf/service_conf.yaml
fi

echo fate_flow module of $party_id done!

# now we handles the route table
Expand Down Expand Up @@ -346,6 +360,27 @@ ${party_id}:
port: 5672
EOF
fi

# spark_local_pulsar
if [[ "$backend" == "spark_local_pulsar" ]]; then
cat >./confs-$party_id/confs/fate_flow/conf/pulsar_route_table.yaml <<EOF
$(for ((j = 0; j < ${#party_list[*]}; j++)); do
if [ "${party_id}" == "${party_list[${j}]}" ]; then
continue
fi
echo "${party_list[${j}]}:
host: ${party_ip_list[${j}]}
port: 6650
"
done)
${party_id}:
host: pulsar
port: 6650
EOF

fi


echo proxy module of $party_id done!

# package of $party_id
Expand Down
2 changes: 1 addition & 1 deletion docker-deploy/parties.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ party_list=(10000 9999)
party_ip_list=(192.168.1.1 192.168.1.2)
serving_ip_list=(192.168.1.1 192.168.1.2)

# backend could be eggroll, spark_rabbitmq and spark_pulsar
# backend could be eggroll, spark_rabbitmq and spark_pulsar spark_local_pulsar
backend=eggroll

# true if you need python-nn else false, the default value will be false
Expand Down
4 changes: 4 additions & 0 deletions docker-deploy/training_template/docker-compose-eggroll.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ volumes:
services:
rollsite:
image: "federatedai/eggroll:${TAG}"
restart: always
ports:
- "9370:9370"
environment:
Expand All @@ -54,6 +55,7 @@ services:

fateboard:
image: "federatedai/fateboard:${TAG}"
restart: always
ports:
- "8080:8080"
volumes:
Expand All @@ -66,6 +68,7 @@ services:

clustermanager:
image: "federatedai/eggroll:${TAG}"
restart: always
expose:
- 4670
volumes:
Expand All @@ -78,6 +81,7 @@ services:

nodemanager:
image: "federatedai/eggroll:${TAG}"
restart: always
expose:
- 4671
volumes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version: '3'
services:
exchange:
image: "federatedai/eggroll:${TAG}"
restart: always
ports:
- "9371:9370"
volumes:
Expand Down
146 changes: 146 additions & 0 deletions docker-deploy/training_template/docker-compose-spark-slim.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright 2019-2020 VMware, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
version: "3"

networks:
fate-network:
ipam:
config:
- subnet: 192.167.0.0/16

volumes:
fate_flow_logs:
download_dir:
shared_dir_examples:
driver: local
driver_opts:
type: none
o: bind
device: {/path/to/host/dir}/examples
shared_dir_federatedml:
driver: local
driver_opts:
type: none
o: bind
device: {/path/to/host/dir}/federatedml
shared_dir_data:
driver: local
driver_opts:
type: none
o: bind
device: {/path/to/host/dir}/data

services:
fateboard:
image: "federatedai/fateboard:${TAG}"
ports:
- "8080:8080"
volumes:
- ./confs/fateboard/conf:/data/projects/fate/fateboard/conf
- fate_flow_logs:/data/projects/fate/fateflow/logs
restart: always
networks:
- fate-network
depends_on:
- python

python:
image: "federatedai/python-spark:${TAG}"
restart: always
ports:
- 9380:9380
- 9360:9360
volumes:
- ./confs/spark/spark-defaults.conf:/data/projects/spark-2.4.1-bin-hadoop2.7/conf/spark-defaults.conf
- ./confs/fate_flow/conf:/data/projects/fate/conf
- shared_dir_federatedml:/data/projects/fate/fate/python/federatedml
- shared_dir_examples:/data/projects/fate/examples
- download_dir:/data/projects/fate/python/download_dir
- fate_flow_logs:/data/projects/fate/fateflow/logs
- ./shared_dir/data/model_local_cache:/data/projects/fate/fateflow/model_local_cache
networks:
fate-network:
ipv4_address: 192.167.0.100
command:
- "/bin/bash"
- "-c"
- |
set -x
sleep 5 && python fateflow/python/fate_flow/fate_flow_server.py
environment:
PYTHONPATH: "$PYTHONPATH:/data/projects/fate/fate/python:/data/projects/fate/eggroll/python:/data/projects/fate/fateflow/python:/data/projects/fate/fate/python/fate_client"
FATE_PROJECT_BASE: "/data/projects/fate"
FATE_FLOW_UPLOAD_MAX_NUM: "1000000"
FATE_FLOW_UPLOAD_MAX_BYTES: "104868093952"

mysql:
image: "mysql:8"
expose:
- 3306
volumes:
- ./confs/mysql/init:/docker-entrypoint-initdb.d/
- ./shared_dir/data/mysql:/var/lib/mysql
restart: always
environment:
MYSQL_ALLOW_EMPTY_PASSWORD: "yes"
networks:
- fate-network

nginx:
image: "federatedai/nginx:${TAG}"
ports:
- 9300:9300
- 9310:9310
volumes:
- ./confs/nginx/route_table.yaml:/data/projects/fate/proxy/nginx/conf/route_table.yaml
- ./confs/nginx/nginx.conf:/data/projects/fate/proxy/nginx/conf/nginx.conf
restart: always
networks:
- fate-network
depends_on:
- python

pulsar:
image: "federatedai/pulsar:2.7.0"
ports:
- "6650:6650"
- "6651:6651"
- "8001:8080"
deploy:
restart_policy:
condition: any
volumes:
- ./confs/pulsar/standalone.conf:/pulsar/conf/standalone.conf
- ./shared_dir/data/pulsar:/pulsar/data
command:
["/bin/bash", "-c", "bin/pulsar standalone -nss"]
restart: always
networks:
- fate-network

client:
image: "federatedai/client:${TAG}"
ports:
- "20000:20000"
restart: always
environment:
FATE_FLOW_IP: "python"
FATE_FLOW_PORT: "9380"
FATE_SERVING_HOST: "fate-serving:8059"
volumes:
- download_dir:/data/projects/fate/download_dir
- shared_dir_examples:/data/projects/fate/examples
depends_on:
- python
networks:
- fate-network
5 changes: 4 additions & 1 deletion docker-deploy/training_template/docker-compose-spark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ services:
- fate_flow_logs:/data/projects/fate/fateflow/logs
networks:
- fate-network
restart: always
depends_on:
- python

Expand Down Expand Up @@ -149,11 +150,12 @@ services:
volumes:
- ./confs/rabbitmq/enabled_plugins:/etc/rabbitmq/enabled_plugins
- ./shared_dir/data/rabbitmq:/var/lib/rabbitmq
restart: always
networks:
- fate-network

pulsar:
image: "apachepulsar/pulsar:2.7.0"
image: "federatedai/pulsar:2.7.0"
ports:
- "6650:6650"
- "6651:6651"
Expand All @@ -166,6 +168,7 @@ services:
- ./shared_dir/data/pulsar:/pulsar/data
command:
["/bin/bash", "-c", "bin/pulsar standalone -nss"]
restart: always
networks:
- fate-network

Expand Down
4 changes: 2 additions & 2 deletions helm-charts/FATE-Exchange/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v1
appVersion: v1.7.0
appVersion: v1.7.1
description: A Helm chart for fate exchange
name: fate-exchange
version: v1.7.0
version: v1.7.1
2 changes: 1 addition & 1 deletion helm-charts/FATE-Exchange/values-template-example.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: fate-exchange
namespace: fate-exchange
chartName: fate-exchange
chartVersion: v1.7.0
chartVersion: v1.7.1
partyId: 1
registry: ""
imageTag: ""
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/FATE-Exchange/values-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ partyName: {{ .name }}
image:
registry: {{ .registry | default "federatedai" }}
isThridParty: {{ empty .registry | ternary "false" "true" }}
tag: {{ .imageTag | default "1.6.1-release" }}
tag: {{ .imageTag | default "1.7.1-release" }}
pullPolicy: {{ .pullPolicy | default "IfNotPresent" }}
{{- with .imagePullSecrets }}
imagePullSecrets:
Expand Down
2 changes: 1 addition & 1 deletion helm-charts/FATE-Exchange/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ partyName: fate-exchange
image:
registry: federatedai
isThridParty:
tag: 1.6.1-release
tag: 1.7.1-release
pullPolicy: IfNotPresent
imagePullSecrets:
# - name:
Expand Down
4 changes: 2 additions & 2 deletions helm-charts/FATE/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
apiVersion: v1
appVersion: v1.7.0
appVersion: v1.7.1
description: A Helm chart for fate-training
name: fate
version: v1.7.0
version: v1.7.1
home: https://fate.fedai.org
icon: https://aisp-1251170195.cos.ap-hongkong.myqcloud.com/wp-content/uploads/sites/12/2019/09/logo.png
sources:
Expand Down
9 changes: 7 additions & 2 deletions helm-charts/FATE/templates/python-spark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ metadata:
{{ include "fate.labels" . | indent 4 }}
data:
spark-defaults.conf: |
spark.master {{ .Values.modules.python.spark.master | default "spark://spark-master:7077"}}
spark.master {{ if eq .Values.modules.python.backend "spark_local_pulsar" }}local[*]{{ else }}{{ .Values.modules.python.spark.master | default "spark://spark-master:7077"}}{{ end }}
spark.driver.host {{ .Values.modules.python.spark.driverHost | default "fateflow" }}
{{- if .Values.modules.python.spark.driverStartPort }}
spark.driver.port {{ .Values.modules.python.spark.driverStartPort }}
Expand Down Expand Up @@ -64,7 +64,7 @@ data:
# host: xx
# http_port: xx
# grpc_port: xx
{{- if eq .Values.modules.python.backend "spark_pulsar" "spark_rabbitmq" }}
{{- if eq .Values.modules.python.backend "spark_pulsar" "spark_rabbitmq" "spark_local_pulsar" }}
proxy: nginx
{{- else }}
proxy: rollsite
Expand Down Expand Up @@ -97,6 +97,10 @@ data:
computing: spark
federation: rabbitmq
storage: hdfs
{{- else if eq .Values.modules.python.backend "spark_local_pulsar" }}
computing: spark
federation: pulsar
storage: localfs
{{- else }}
computing: eggroll
federation: eggroll
Expand Down Expand Up @@ -544,6 +548,7 @@ spec:
targetPort: 9380
protocol: TCP
type: ClusterIP
clusterIP: None
selector:
fateMoudle: python
{{ include "fate.matchLabels" . | indent 4 }}
Expand Down
Loading

0 comments on commit 946616c

Please sign in to comment.