-
Notifications
You must be signed in to change notification settings - Fork 434
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5278216
commit ba0ea5e
Showing
11 changed files
with
244 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
PlatformWithVersion=Python | ||
BuildCommands=conda env create --file environment.yml --prefix ./venv --quiet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: basic_cleaning | ||
conda_env: conda.yml | ||
|
||
entry_points: | ||
main: | ||
parameters: | ||
|
||
parameters [parameter1: | ||
description: ## ADD DESCRIPTION | ||
type: string | ||
|
||
parameter2]: parameter1: | ||
description: ## ADD DESCRIPTION | ||
type: string | ||
|
||
parameter2: | ||
description: ## ADD DESCRIPTION | ||
type: string | ||
|
||
parameter3: | ||
description: ## ADD DESCRIPTION | ||
type: string | ||
|
||
|
||
command: >- | ||
python run.py --parameters [parameter1 {parameters [parameter1} --parameter2]: parameter1 {parameter2]: parameter1} --parameter2 {parameter2} --parameter3 {parameter3} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: basic_cleaning | ||
channels: | ||
- conda-forge | ||
- defaults | ||
dependencies: | ||
- pip=20.3.3 | ||
- numpy=1.20.1 | ||
- pip: | ||
- wandb==0.10.31 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python | ||
""" | ||
erforms basic cleaning on the data and save the results in Weights & Biases | ||
""" | ||
import argparse | ||
import logging | ||
import wandb | ||
import pandas as pd | ||
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)-15s %(message)s") | ||
logger = logging.getLogger() | ||
|
||
|
||
def go(args): | ||
|
||
run = wandb.init(job_type="basic_cleaning") | ||
run.config.update(args) | ||
|
||
# Download input artifact. This will also log that this script is using this | ||
# particular version of the artifact | ||
# artifact_local_path = run.use_artifact(args.input_artifact).file() | ||
|
||
logger.info(f'Downloading artifact {args.input_artifact}') | ||
artifact_local_path = run.use_artifact(args.input_artifact).file() | ||
|
||
# Read the data from the artifact | ||
logger.info("Loading DataFrame") | ||
df = pd.read_csv(artifact_local_path) | ||
|
||
# Remove outliers | ||
logger.info('Drop outliers and convert datetime') | ||
idx = df['price'].between(args.min_price, args.max_price) | ||
df = df[idx].copy() | ||
df['last_review'] = pd.to_datetime(df['last_review']) | ||
|
||
# Remove observations outside of NYC | ||
idx = df['longitude'].between(-74.25, -73.50) & df['latitude'].between(40.5, 41.2) | ||
df = df[idx].copy() | ||
|
||
# Save dataframe | ||
logger.info(f'Saving Dataframe {args.output_artifact}') | ||
df.to_csv('clean_sample.csv', index=False) | ||
|
||
# Upload Dataframe Artifact to W&B | ||
artifact = wandb.Artifact( | ||
args.output_artifact, | ||
type=args.output_type, | ||
description=args.output_description, | ||
) | ||
artifact.add_file("clean_sample.csv") | ||
run.log_artifact(artifact) | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
parser = argparse.ArgumentParser(description=" A very basic data cleaning") | ||
|
||
|
||
parser.add_argument( | ||
"--input_artifact", | ||
type=str,## INSERT TYPE HERE: str, float or int, | ||
help="the input artifact name",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
parser.add_argument( | ||
"--output_artifact", | ||
type=str,## INSERT TYPE HERE: str, float or int, | ||
help="the output artifact name",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
parser.add_argument( | ||
"--output_type", | ||
type=str,## INSERT TYPE HERE: str, float or int, | ||
help="the output artifact type",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
parser.add_argument( | ||
"--output_description", | ||
type=str,## INSERT TYPE HERE: str, float or int, | ||
help="the output artifact description",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
parser.add_argument( | ||
"--min_price", | ||
type=float,## INSERT TYPE HERE: str, float or int, | ||
help="Outlier Min price",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
parser.add_argument( | ||
"--max_price", | ||
type=float,## INSERT TYPE HERE: str, float or int, | ||
help="Outlier Max price",## INSERT DESCRIPTION HERE, | ||
required=True | ||
) | ||
|
||
|
||
args = parser.parse_args() | ||
|
||
go(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters