.project-metadata.yaml

name: ML Churn Prototype
description: Prototype to demonstrate building a churn model on CML
author: Cloudera Inc.
specification_version: 1.0
prototype_version: 2.0
date: "2022-03-25"

environment_variables:
  DATA_LOCATION:
    default: data/churn_prototype
    description: >-
      Relative path that will be used to store the data used for this prototype.
      This should be a location you have write access to, and which is suitable for non-production data.
  HIVE_DATABASE:
    default: default
    description: >-
      Name of the Hive database that will be used to create the Hive table used for this prototype.
      This should be a Hive database you have write access to, and which is suitable for non-production data.
  HIVE_TABLE:
    default: churn_prototype
    description: >-
      Name of the Hive table that will be created and populated with the data used for this prototype.
      If the table already exists, the prototype will assume it already contains the data for this prototype.

feature_dependencies:
  - model_metrics

runtimes:
  - editor: PBJ Workbench
    kernel: Python 3.9
    edition: Standard
    addons: ["Spark 3.2.3 - CDE 1.22.0"]

tasks:
  - type: run_session
    name: Install dependencies
    script: code/0_bootstrap.py
    short_summary: >-
      Install dependencies, set environment variables, and upload data
    cpu: 1
    memory: 2
    
  - type: run_session
    name: Ingest data
    script: code/1_data_ingest.py
    short_summary: Ingest data into our Hive table
    long_summary: >-
      This script will only run if STORAGE_MODE is set to external
    cpu: 1
    memory: 2
      
  - type: run_session
    name: Train Churn Model
    script: code/4_train_models.py
    short_summary: Train models
    cpu: 1
    memory: 2
    
  - type: create_model
    name: Churn Model API Endpoint
    entity_label: telco_churn_model
    description: This model API endpoint is used to predict churn
    short_summary: Create the churn model prediction api endpoint
    access_key_environment_variable: SHTM_ACCESS_KEY
    default_resources:
      cpu: 1
      memory: 2
    default_replication_policy:
      type: fixed
      num_replicas: 1

  - type: build_model
    name: Build Telco Churn Model Endpoint
    entity_label: telco_churn_model
    comment: Build churn model
    examples:
      - request:
          {
            "StreamingTV": "No",
            "MonthlyCharges": 70.35,
            "PhoneService": "No",
            "PaperlessBilling": "No",
            "Partner": "No",
            "OnlineBackup": "No",
            "gender": "Female",
            "Contract": "Month-to-month",
            "TotalCharges": 1397.475,
            "StreamingMovies": "No",
            "DeviceProtection": "No",
            "PaymentMethod": "Bank transfer (automatic)",
            "tenure": 29,
            "Dependents": "No",
            "OnlineSecurity": "No",
            "MultipleLines": "No",
            "InternetService": "DSL",
            "SeniorCitizen": "No",
            "TechSupport": "No",
          }
        response: ""
    target_file_path: code/5_model_serve_explainer.py
    target_function_name: explain

  - type: deploy_model
    name: telco_churn_model
    entity_label: telco_churn_model
    cpu: 1
    gpu: 0

  - type: start_application
    name: Application to Serve Churn UI
    subdomain: churn
    script: code/6_application.py