Skip to content

Commit

Permalink
Merge pull request #826 from sean-morris/summaries
Browse files Browse the repository at this point in the history
Otter Grade Write Individual Notebook Results
  • Loading branch information
chrispyles authored Aug 28, 2024
2 parents db6ee4f + 22630d2 commit b76a733
Show file tree
Hide file tree
Showing 19 changed files with 1,318 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Changelog

**v5.6.0 (unreleased):**

* Updated Otter Grade to write grading summary for each notebook per [#814](https://github.com/ucbds-infra/otter-grader/issues/814)
* Updated Otter Grade CSV to indicate which notebooks timeout per [#813](https://github.com/ucbds-infra/otter-grader/issues/813)
* Updated Otter Grade CSV to include the number of points per question in the first row
* Updated Otter Grade CSV to include total points column
Expand Down
1 change: 1 addition & 0 deletions otter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def generate_cli(*args, **kwargs):
@click.option("-a", "--autograder", default=defaults["autograder"], help="Path to autograder zip file")
@click.option("-o", "--output-dir", default=defaults["output_dir"], help="Directory to which to write output")
@click.option("--ext", default=defaults["ext"], type=click.Choice(_ALLOWED_EXTENSIONS), help="The extension to glob for submissions")
@click.option("--summaries", is_flag=True, help="Whether to write the otter run results for each graded notebook")
@click.option("--pdfs", is_flag=True, help="Whether to copy notebook PDFs out of containers")
@click.option("--containers", default=defaults["containers"], type=click.INT, help="Specify number of containers to run in parallel")
@click.option("--image", default=defaults["image"], help="A Docker image tag to use as the base image")
Expand Down
13 changes: 13 additions & 0 deletions otter/grade/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .utils import (
merge_csv,
prune_images,
POINTS_POSSIBLE_LABEL,
SCORES_DICT_FILE_KEY,
SCORES_DICT_PERCENT_CORRECT_KEY,
SCORES_DICT_TOTAL_POINTS_KEY,
Expand All @@ -33,6 +34,7 @@ def main(
autograder: str = "./autograder.zip",
containers: int = 4,
ext: str = "ipynb",
summaries: bool = False,
no_kill: bool = False,
image: str = "ubuntu:22.04",
pdfs: bool = False,
Expand Down Expand Up @@ -155,6 +157,17 @@ def main(
# write to CSV file
output_df.to_csv(os.path.join(output_dir, "final_grades.csv"), index=False)

# write score summaries to files
if summaries:
grading_summary_path = os.path.join(output_dir, "grading-summaries")
if not os.path.exists(grading_summary_path):
os.mkdir(grading_summary_path)
for df in grade_dfs:
df_dict = df.to_dict()
if df_dict['file'][0] != POINTS_POSSIBLE_LABEL:
with open(os.path.join(grading_summary_path, f"{df_dict['file'][0]}.txt"), mode="w") as f:
f.write(df_dict["summary"][0])

# return percentage if a single file was graded
if len(paths) == 1 and os.path.isfile(paths[0]):
return output_df[SCORES_DICT_PERCENT_CORRECT_KEY][1]
4 changes: 4 additions & 0 deletions test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,10 @@ def test_grade(mocked_grade, run_cli):
assert_cli_result(result, expect_error=False)
mocked_grade.assert_called_with(**{**std_kwargs, "ext": ext})

result = run_cli([*cmd_start, "--summaries"])
assert_cli_result(result, expect_error=False)
mocked_grade.assert_called_with(**{**std_kwargs, "summaries": True})

result = run_cli([*cmd_start, "--pdfs"])
assert_cli_result(result, expect_error=False)
mocked_grade.assert_called_with(**{**std_kwargs, "pdfs": True})
Expand Down
72 changes: 72 additions & 0 deletions test/test_grade/files/results/fails1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
q1 results:
q1 - 1 result:
❌ Test case failed
Trying:
square(3)
Expecting:
9
**********************************************************************
Line 2, in q1 0
Failed example:
square(3)
Expected:
9
Got:
27

q1 - 2 result:
❌ Test case failed
Trying:
square(2.5)
Expecting:
6.25
**********************************************************************
Line 2, in q1 1
Failed example:
square(2.5)
Expected:
6.25
Got:
15.625

q1 - 3 result:
❌ Test case failed
Trying:
square(6)
Expecting:
36
**********************************************************************
Line 2, in q1 2
Failed example:
square(6)
Expected:
36
Got:
216

q1 - 4 result:
❌ Test case failed
Trying:
square(1.5)
Expecting:
2.25
**********************************************************************
Line 2, in q1 3
Failed example:
square(1.5)
Expected:
2.25
Got:
3.375

q2b results: All test cases passed!

q2 results: All test cases passed!

q3 results: All test cases passed!

q4 results: All test cases passed!

q6 results: All test cases passed!

q7 results: All test cases passed!
48 changes: 48 additions & 0 deletions test/test_grade/files/results/fails1H.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
q1 results:
q1 - 1 result:
✅ Test case passed

q1 - 2 result:
✅ Test case passed

q1 - 3 result:
❌ Test case failed
Trying:
square(6)
Expecting:
36
**********************************************************************
Line 2, in q1 2
Failed example:
square(6)
Expected:
36
Got:
216

q1 - 4 result:
❌ Test case failed
Trying:
square(1.5)
Expecting:
2.25
**********************************************************************
Line 2, in q1 3
Failed example:
square(1.5)
Expected:
2.25
Got:
3.375

q2b results: All test cases passed!

q2 results: All test cases passed!

q3 results: All test cases passed!

q4 results: All test cases passed!

q6 results: All test cases passed!

q7 results: All test cases passed!
164 changes: 164 additions & 0 deletions test/test_grade/files/results/fails1and3and6.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
q1 results:
q1 - 1 result:
❌ Test case failed
Trying:
square(3)
Expecting:
9
**********************************************************************
Line 2, in q1 0
Failed example:
square(3)
Expected:
9
Got:
243

q1 - 2 result:
❌ Test case failed
Trying:
square(2.5)
Expecting:
6.25
**********************************************************************
Line 2, in q1 1
Failed example:
square(2.5)
Expected:
6.25
Got:
97.65625

q1 - 3 result:
❌ Test case failed
Trying:
square(6)
Expecting:
36
**********************************************************************
Line 2, in q1 2
Failed example:
square(6)
Expected:
36
Got:
7776

q1 - 4 result:
❌ Test case failed
Trying:
square(1.5)
Expecting:
2.25
**********************************************************************
Line 2, in q1 3
Failed example:
square(1.5)
Expected:
2.25
Got:
7.59375

q2b results: All test cases passed!

q2 results: All test cases passed!

q3 results:
q3 - 1 result:
❌ Test case failed
Trying:
x
Expecting:
True
**********************************************************************
Line 2, in q3 0
Failed example:
x
Exception raised:
Traceback (most recent call last):
File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
exec(compile(example.source, filename, "single",
File "<doctest q3 0[0]>", line 1, in <module>
x
NameError: name 'x' is not defined

q3 - 2 result:
❌ Test case failed
Trying:
if x:
print("yep")
else:
print("nope")
Expecting:
yep
**********************************************************************
Line 2, in q3 1
Failed example:
if x:
print("yep")
else:
print("nope")
Exception raised:
Traceback (most recent call last):
File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
exec(compile(example.source, filename, "single",
File "<doctest q3 1[0]>", line 1, in <module>
if x:
NameError: name 'x' is not defined

q4 results: All test cases passed!

q6 results:
q6 - 1 result:
❌ Test case failed
Trying:
fib = fiberator()
Expecting nothing
ok
Trying:
next(fib) == 0 and next(fib) == 1
Expecting:
True
**********************************************************************
Line 3, in q6 0
Failed example:
next(fib) == 0 and next(fib) == 1
Expected:
True
Got:
False

q6 - 2 result:
❌ Test case failed
Trying:
fib = fiberator()
Expecting nothing
ok
Trying:
for _ in range(10):
print(next(fib))
Expecting:
0
1
1
2
3
5
8
13
21
34
**********************************************************************
Line 3, in q6 1
Failed example:
for _ in range(10):
print(next(fib))
Exception raised:
Traceback (most recent call last):
File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
exec(compile(example.source, filename, "single",
File "<doctest q6 1[1]>", line 2, in <module>
print(next(fib))
StopIteration

q7 results: All test cases passed!
Loading

0 comments on commit b76a733

Please sign in to comment.