Skip to content

Commit

Permalink
fix monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
kachiann committed Aug 19, 2024
1 parent f42d55a commit 9340177
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 9 deletions.
Binary file added models/DecisionTreeRegressor.pkl
Binary file not shown.
Binary file added models/LinearRegression.pkl
Binary file not shown.
15 changes: 15 additions & 0 deletions monitoring/create_monitoring_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the original dataset
df = pd.read_csv("../project-mlops/data/hour.csv")


# Split the data into reference (older data) and production (newer data)
reference_data, production_data = train_test_split(df, test_size=0.3, shuffle=False)

# Save the datasets
reference_data.to_csv("reference_data.csv", index=False)
production_data.to_csv("production_data.csv", index=False)

print("Reference and production datasets created successfully.")
28 changes: 19 additions & 9 deletions monitoring/evidently_metrics_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,20 @@
rand = random.Random()

create_table_statement = """
DROP TABLE IF EXISTS dummy_metrics;
CREATE TABLE dummy_metrics(
CREATE TABLE IF NOT EXISTS dummy_metrics (
timestamp TIMESTAMP,
prediction_drift FLOAT,
num_drifted_columns INTEGER,
share_missing_values FLOAT
)
"""

reference_data = pd.read_csv("../data/reference.csv")
with open("../models/dec_tre.bin", "rb") as f_in:
# Update the file path to the new location
reference_data = pd.read_csv("../project-mlops/data/reference.csv")
with open("../project-mlops/models/DecisionTreeRegressor.pkl", "rb") as f_in:
model = joblib.load(f_in)

raw_data = pd.read_csv("../data/hour.csv")
raw_data = pd.read_csv("../project-mlops/data/hour.csv")

features = [
"season",
Expand Down Expand Up @@ -70,15 +70,24 @@ def prep_db():
with psycopg.connect(
"host=localhost port=5432 user=postgres password=example", autocommit=True
) as conn:
# Check if the database exists
res = conn.execute("SELECT 1 FROM pg_database WHERE datname='test'")
if not res.fetchall():
logging.info("Database 'test' not found. Creating database.")
conn.execute("CREATE DATABASE test;")
else:
logging.info("Database 'test' already exists.")

# Connect to the database and create the table
with psycopg.connect(
"host=localhost port=5432 dbname=test user=postgres password=example"
) as conn:
conn.execute(create_table_statement)
logging.info("Table 'dummy_metrics' is ready.")
except psycopg.OperationalError as e:
logging.error("OperationalError: %s", str(e))
except Exception as e:
logging.error("Error preparing the database: %s", {e})
logging.error("Error preparing the database: %s", str(e))


@task
Expand Down Expand Up @@ -112,8 +121,9 @@ def calculate_metrics_postgresql(curr):
share_missing_values,
),
)
logging.info("Metrics inserted into database.")
except Exception as e:
logging.error("Error calculating metrics: %s", {e})
logging.error("Error calculating metrics: %s", str(e))


@flow
Expand All @@ -134,9 +144,9 @@ def batch_monitoring_backfill():
if seconds_elapsed < SEND_TIMEOUT:
time.sleep(SEND_TIMEOUT - seconds_elapsed)
last_send += datetime.timedelta(seconds=10)
logging.info("Data sent")
logging.info("Data sent. Waiting for the next iteration.")
except Exception as e:
logging.error("Error in batch monitoring: %s", e)
logging.error("Error in batch monitoring: %s", str(e))


if __name__ == "__main__":
Expand Down

0 comments on commit 9340177

Please sign in to comment.