diff --git a/CHANGELOG.md b/CHANGELOG.md
index dfac84bf..1f1a0454 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 **v5.6.0 (unreleased):**
-
+* Updated Otter Grade to write grading summary for each notebook per [#814](https://github.com/ucbds-infra/otter-grader/issues/814)
 * Updated Otter Grade CSV to indicate which notebooks timeout per [#813](https://github.com/ucbds-infra/otter-grader/issues/813)
 * Updated Otter Grade CSV to include the number of points per question in the first row
 * Updated Otter Grade CSV to include total points column
diff --git a/otter/cli.py b/otter/cli.py
index 105568cc..2f09b9e3 100644
--- a/otter/cli.py
+++ b/otter/cli.py
@@ -132,6 +132,7 @@ def generate_cli(*args, **kwargs):
 @click.option("-a", "--autograder", default=defaults["autograder"], help="Path to autograder zip file")
 @click.option("-o", "--output-dir", default=defaults["output_dir"], help="Directory to which to write output")
 @click.option("--ext", default=defaults["ext"], type=click.Choice(_ALLOWED_EXTENSIONS), help="The extension to glob for submissions")
+@click.option("--summaries", is_flag=True, help="Whether to write the otter run results for each graded notebook")
 @click.option("--pdfs", is_flag=True, help="Whether to copy notebook PDFs out of containers")
 @click.option("--containers", default=defaults["containers"], type=click.INT, help="Specify number of containers to run in parallel")
 @click.option("--image", default=defaults["image"], help="A Docker image tag to use as the base image")
diff --git a/otter/grade/__init__.py b/otter/grade/__init__.py
index 6d45a28d..cee34ac3 100644
--- a/otter/grade/__init__.py
+++ b/otter/grade/__init__.py
@@ -10,6 +10,7 @@
 from .utils import (
     merge_csv,
     prune_images,
+    POINTS_POSSIBLE_LABEL,
     SCORES_DICT_FILE_KEY,
     SCORES_DICT_PERCENT_CORRECT_KEY,
     SCORES_DICT_TOTAL_POINTS_KEY,
@@ -33,6 +34,7 @@ def main(
     autograder: str = "./autograder.zip",
     containers: int = 4, 
     ext: str = "ipynb",
+    summaries: bool = False,
     no_kill: bool = False,
     image: str = "ubuntu:22.04", 
     pdfs: bool = False,
@@ -155,6 +157,17 @@ def main(
     # write to CSV file
     output_df.to_csv(os.path.join(output_dir, "final_grades.csv"), index=False)
 
+    # write score summaries to files
+    if summaries:
+        grading_summary_path = os.path.join(output_dir, "grading-summaries")
+        if not os.path.exists(grading_summary_path):
+            os.mkdir(grading_summary_path)
+        for df in grade_dfs:
+            df_dict = df.to_dict()
+            if df_dict['file'][0] != POINTS_POSSIBLE_LABEL:
+                with open(os.path.join(grading_summary_path, f"{df_dict['file'][0]}.txt"), mode="w") as f:
+                    f.write(df_dict["summary"][0])
+
     # return percentage if a single file was graded
     if len(paths) == 1 and os.path.isfile(paths[0]):
         return output_df[SCORES_DICT_PERCENT_CORRECT_KEY][1]
diff --git a/test/test_cli.py b/test/test_cli.py
index 44b2ebb5..75a317b5 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -479,6 +479,10 @@ def test_grade(mocked_grade, run_cli):
         assert_cli_result(result, expect_error=False)
         mocked_grade.assert_called_with(**{**std_kwargs, "ext": ext})
 
+    result = run_cli([*cmd_start, "--summaries"])
+    assert_cli_result(result, expect_error=False)
+    mocked_grade.assert_called_with(**{**std_kwargs, "summaries": True})
+
     result = run_cli([*cmd_start, "--pdfs"])
     assert_cli_result(result, expect_error=False)
     mocked_grade.assert_called_with(**{**std_kwargs, "pdfs": True})
diff --git a/test/test_grade/files/results/fails1.txt b/test/test_grade/files/results/fails1.txt
new file mode 100644
index 00000000..4e00aa7f
--- /dev/null
+++ b/test/test_grade/files/results/fails1.txt
@@ -0,0 +1,72 @@
+q1 results:
+    q1 - 1 result:
+        ❌ Test case failed
+        Trying:
+            square(3)
+        Expecting:
+            9
+        **********************************************************************
+        Line 2, in q1 0
+        Failed example:
+            square(3)
+        Expected:
+            9
+        Got:
+            27
+
+    q1 - 2 result:
+        ❌ Test case failed
+        Trying:
+            square(2.5)
+        Expecting:
+            6.25
+        **********************************************************************
+        Line 2, in q1 1
+        Failed example:
+            square(2.5)
+        Expected:
+            6.25
+        Got:
+            15.625
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            216
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            3.375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails1H.txt b/test/test_grade/files/results/fails1H.txt
new file mode 100644
index 00000000..90799964
--- /dev/null
+++ b/test/test_grade/files/results/fails1H.txt
@@ -0,0 +1,48 @@
+q1 results:
+    q1 - 1 result:
+        ✅ Test case passed
+
+    q1 - 2 result:
+        ✅ Test case passed
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            216
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            3.375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails1and3and6.txt b/test/test_grade/files/results/fails1and3and6.txt
new file mode 100644
index 00000000..1ca70df7
--- /dev/null
+++ b/test/test_grade/files/results/fails1and3and6.txt
@@ -0,0 +1,164 @@
+q1 results:
+    q1 - 1 result:
+        ❌ Test case failed
+        Trying:
+            square(3)
+        Expecting:
+            9
+        **********************************************************************
+        Line 2, in q1 0
+        Failed example:
+            square(3)
+        Expected:
+            9
+        Got:
+            243
+
+    q1 - 2 result:
+        ❌ Test case failed
+        Trying:
+            square(2.5)
+        Expecting:
+            6.25
+        **********************************************************************
+        Line 2, in q1 1
+        Failed example:
+            square(2.5)
+        Expected:
+            6.25
+        Got:
+            97.65625
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            7776
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            7.59375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results:
+    q3 - 1 result:
+        ❌ Test case failed
+        Trying:
+            x
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q3 0
+        Failed example:
+            x
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q3 0[0]>", line 1, in <module>
+                x
+            NameError: name 'x' is not defined
+
+    q3 - 2 result:
+        ❌ Test case failed
+        Trying:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expecting:
+            yep
+        **********************************************************************
+        Line 2, in q3 1
+        Failed example:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q3 1[0]>", line 1, in <module>
+                if x:
+            NameError: name 'x' is not defined
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            next(fib) == 0 and next(fib) == 1
+        Expecting:
+            True
+        **********************************************************************
+        Line 3, in q6 0
+        Failed example:
+            next(fib) == 0 and next(fib) == 1
+        Expected:
+            True
+        Got:
+            False
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q6 1[1]>", line 2, in <module>
+                print(next(fib))
+            StopIteration
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails1and4.txt b/test/test_grade/files/results/fails1and4.txt
new file mode 100644
index 00000000..1959bbe8
--- /dev/null
+++ b/test/test_grade/files/results/fails1and4.txt
@@ -0,0 +1,86 @@
+q1 results:
+    q1 - 1 result:
+        ❌ Test case failed
+        Trying:
+            square(3)
+        Expecting:
+            9
+        **********************************************************************
+        Line 2, in q1 0
+        Failed example:
+            square(3)
+        Expected:
+            9
+        Got:
+            27
+
+    q1 - 2 result:
+        ❌ Test case failed
+        Trying:
+            square(2.5)
+        Expecting:
+            6.25
+        **********************************************************************
+        Line 2, in q1 1
+        Failed example:
+            square(2.5)
+        Expected:
+            6.25
+        Got:
+            15.625
+
+    q1 - 3 result:
+        ❌ Test case failed
+        Trying:
+            square(6)
+        Expecting:
+            36
+        **********************************************************************
+        Line 2, in q1 2
+        Failed example:
+            square(6)
+        Expected:
+            36
+        Got:
+            216
+
+    q1 - 4 result:
+        ❌ Test case failed
+        Trying:
+            square(1.5)
+        Expecting:
+            2.25
+        **********************************************************************
+        Line 2, in q1 3
+        Failed example:
+            square(1.5)
+        Expected:
+            2.25
+        Got:
+            3.375
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results:
+    q4 - 1 result:
+        ❌ Test case failed
+        Trying:
+            np.isclose(x, 39.0625)
+        Expecting:
+            np.True_
+        **********************************************************************
+        Line 2, in q4 0
+        Failed example:
+            np.isclose(x, 39.0625)
+        Expected:
+            np.True_
+        Got:
+            np.False_
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails2.txt b/test/test_grade/files/results/fails2.txt
new file mode 100644
index 00000000..4c31d970
--- /dev/null
+++ b/test/test_grade/files/results/fails2.txt
@@ -0,0 +1,116 @@
+q1 results: All test cases passed!
+
+q2b results:
+    q2b - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate([])
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 0
+        Failed example:
+            negate([])
+        Expected:
+            True
+        Got:
+            []
+
+    q2b - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate({})
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 1
+        Failed example:
+            negate({})
+        Expected:
+            True
+        Got:
+            {}
+
+    q2b - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate([1])
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2b 2
+        Failed example:
+            negate([1])
+        Expected:
+            False
+        Got:
+            [1]
+
+q2 results:
+    q2 - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate(True)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 0
+        Failed example:
+            negate(True)
+        Expected:
+            False
+        Got:
+            True
+
+    q2 - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate(False)
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 1
+        Failed example:
+            negate(False)
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate("")
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 2
+        Failed example:
+            negate("")
+        Expected:
+            True
+        Got:
+            ''
+
+    q2 - 4 result:
+        ❌ Test case failed
+        Trying:
+            negate(1)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 3
+        Failed example:
+            negate(1)
+        Expected:
+            False
+        Got:
+            1
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails2and3and6H.txt b/test/test_grade/files/results/fails2and3and6H.txt
new file mode 100644
index 00000000..d8304809
--- /dev/null
+++ b/test/test_grade/files/results/fails2and3and6H.txt
@@ -0,0 +1,201 @@
+q1 results: All test cases passed!
+
+q2b results:
+    q2b - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate([])
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 0
+        Failed example:
+            negate([])
+        Expected:
+            True
+        Got:
+            False
+
+    q2b - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate({})
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 1
+        Failed example:
+            negate({})
+        Expected:
+            True
+        Got:
+            False
+
+    q2b - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate([1])
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2b 2
+        Failed example:
+            negate([1])
+        Expected:
+            False
+        Got:
+            True
+
+q2 results:
+    q2 - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate(True)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 0
+        Failed example:
+            negate(True)
+        Expected:
+            False
+        Got:
+            True
+
+    q2 - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate(False)
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 1
+        Failed example:
+            negate(False)
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate("")
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 2
+        Failed example:
+            negate("")
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 4 result:
+        ❌ Test case failed
+        Trying:
+            negate(1)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 3
+        Failed example:
+            negate(1)
+        Expected:
+            False
+        Got:
+            True
+
+q3 results:
+    q3 - 1 result:
+        ❌ Test case failed
+        Trying:
+            x
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q3 0
+        Failed example:
+            x
+        Expected:
+            True
+        Got:
+            False
+
+    q3 - 2 result:
+        ❌ Test case failed
+        Trying:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expecting:
+            yep
+        **********************************************************************
+        Line 2, in q3 1
+        Failed example:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expected:
+            yep
+        Got:
+            nope
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ✅ Test case passed
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Expected:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        Got:
+            0
+            1
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails2and4.txt b/test/test_grade/files/results/fails2and4.txt
new file mode 100644
index 00000000..5a40db41
--- /dev/null
+++ b/test/test_grade/files/results/fails2and4.txt
@@ -0,0 +1,130 @@
+q1 results: All test cases passed!
+
+q2b results:
+    q2b - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate([])
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 0
+        Failed example:
+            negate([])
+        Expected:
+            True
+        Got:
+            False
+
+    q2b - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate({})
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 1
+        Failed example:
+            negate({})
+        Expected:
+            True
+        Got:
+            False
+
+    q2b - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate([1])
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2b 2
+        Failed example:
+            negate([1])
+        Expected:
+            False
+        Got:
+            True
+
+q2 results:
+    q2 - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate(True)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 0
+        Failed example:
+            negate(True)
+        Expected:
+            False
+        Got:
+            True
+
+    q2 - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate(False)
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 1
+        Failed example:
+            negate(False)
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate("")
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 2
+        Failed example:
+            negate("")
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 4 result:
+        ❌ Test case failed
+        Trying:
+            negate(1)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 3
+        Failed example:
+            negate(1)
+        Expected:
+            False
+        Got:
+            True
+
+q3 results: All test cases passed!
+
+q4 results:
+    q4 - 1 result:
+        ❌ Test case failed
+        Trying:
+            np.isclose(x, 39.0625)
+        Expecting:
+            np.True_
+        **********************************************************************
+        Line 2, in q4 0
+        Failed example:
+            np.isclose(x, 39.0625)
+        Expected:
+            np.True_
+        Got:
+            np.False_
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails2and6H.txt b/test/test_grade/files/results/fails2and6H.txt
new file mode 100644
index 00000000..c5a01b53
--- /dev/null
+++ b/test/test_grade/files/results/fails2and6H.txt
@@ -0,0 +1,166 @@
+q1 results: All test cases passed!
+
+q2b results:
+    q2b - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate([])
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 0
+        Failed example:
+            negate([])
+        Expected:
+            True
+        Got:
+            []
+
+    q2b - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate({})
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2b 1
+        Failed example:
+            negate({})
+        Expected:
+            True
+        Got:
+            {}
+
+    q2b - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate([1])
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2b 2
+        Failed example:
+            negate([1])
+        Expected:
+            False
+        Got:
+            [1]
+
+q2 results:
+    q2 - 1 result:
+        ❌ Test case failed
+        Trying:
+            negate(True)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 0
+        Failed example:
+            negate(True)
+        Expected:
+            False
+        Got:
+            True
+
+    q2 - 2 result:
+        ❌ Test case failed
+        Trying:
+            negate(False)
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 1
+        Failed example:
+            negate(False)
+        Expected:
+            True
+        Got:
+            False
+
+    q2 - 3 result:
+        ❌ Test case failed
+        Trying:
+            negate("")
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q2 2
+        Failed example:
+            negate("")
+        Expected:
+            True
+        Got:
+            ''
+
+    q2 - 4 result:
+        ❌ Test case failed
+        Trying:
+            negate(1)
+        Expecting:
+            False
+        **********************************************************************
+        Line 2, in q2 3
+        Failed example:
+            negate(1)
+        Expected:
+            False
+        Got:
+            1
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ✅ Test case passed
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Expected:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        Got:
+            0
+            1
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails3.txt b/test/test_grade/files/results/fails3.txt
new file mode 100644
index 00000000..e8873104
--- /dev/null
+++ b/test/test_grade/files/results/fails3.txt
@@ -0,0 +1,48 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results:
+    q3 - 1 result:
+        ❌ Test case failed
+        Trying:
+            x
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q3 0
+        Failed example:
+            x
+        Expected:
+            True
+        Got:
+            False
+
+    q3 - 2 result:
+        ❌ Test case failed
+        Trying:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expecting:
+            yep
+        **********************************************************************
+        Line 2, in q3 1
+        Failed example:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expected:
+            yep
+        Got:
+            nope
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails3and4.txt b/test/test_grade/files/results/fails3and4.txt
new file mode 100644
index 00000000..d6eb8295
--- /dev/null
+++ b/test/test_grade/files/results/fails3and4.txt
@@ -0,0 +1,62 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results:
+    q3 - 1 result:
+        ❌ Test case failed
+        Trying:
+            x
+        Expecting:
+            True
+        **********************************************************************
+        Line 2, in q3 0
+        Failed example:
+            x
+        Expected:
+            True
+        Got:
+            False
+
+    q3 - 2 result:
+        ❌ Test case failed
+        Trying:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expecting:
+            yep
+        **********************************************************************
+        Line 2, in q3 1
+        Failed example:
+            if x:
+                print("yep")
+            else:
+                print("nope")
+        Expected:
+            yep
+        Got:
+            nope
+
+q4 results:
+    q4 - 1 result:
+        ❌ Test case failed
+        Trying:
+            np.isclose(x, 39.0625)
+        Expecting:
+            np.True_
+        **********************************************************************
+        Line 2, in q4 0
+        Failed example:
+            np.isclose(x, 39.0625)
+        Expected:
+            np.True_
+        Got:
+            np.False_
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails4.txt b/test/test_grade/files/results/fails4.txt
new file mode 100644
index 00000000..7e529a8b
--- /dev/null
+++ b/test/test_grade/files/results/fails4.txt
@@ -0,0 +1,27 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results:
+    q4 - 1 result:
+        ❌ Test case failed
+        Trying:
+            np.isclose(x, 39.0625)
+        Expecting:
+            np.True_
+        **********************************************************************
+        Line 2, in q4 0
+        Failed example:
+            np.isclose(x, 39.0625)
+        Expected:
+            np.True_
+        Got:
+            np.False_
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails6.txt b/test/test_grade/files/results/fails6.txt
new file mode 100644
index 00000000..c1f12759
--- /dev/null
+++ b/test/test_grade/files/results/fails6.txt
@@ -0,0 +1,64 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            next(fib) == 0 and next(fib) == 1
+        Expecting:
+            True
+        **********************************************************************
+        Line 3, in q6 0
+        Failed example:
+            next(fib) == 0 and next(fib) == 1
+        Expected:
+            True
+        Got:
+            False
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Exception raised:
+            Traceback (most recent call last):
+              File "/root/mambaforge/envs/otter-env/lib/python3.9/doctest.py", line 1334, in __run
+                exec(compile(example.source, filename, "single",
+              File "<doctest q6 1[1]>", line 2, in <module>
+                print(next(fib))
+            StopIteration
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/fails6H.txt b/test/test_grade/files/results/fails6H.txt
new file mode 100644
index 00000000..bc9ae463
--- /dev/null
+++ b/test/test_grade/files/results/fails6H.txt
@@ -0,0 +1,63 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results:
+    q6 - 1 result:
+        ✅ Test case passed
+
+    q6 - 2 result:
+        ❌ Test case failed
+        Trying:
+            fib = fiberator()
+        Expecting nothing
+        ok
+        Trying:
+            for _ in range(10):
+                print(next(fib))
+        Expecting:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        **********************************************************************
+        Line 3, in q6 1
+        Failed example:
+            for _ in range(10):
+                print(next(fib))
+        Expected:
+            0
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+            34
+        Got:
+            0
+            1
+            1
+            1
+            2
+            3
+            5
+            8
+            13
+            21
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/files/results/passesAll.txt b/test/test_grade/files/results/passesAll.txt
new file mode 100644
index 00000000..d42468fb
--- /dev/null
+++ b/test/test_grade/files/results/passesAll.txt
@@ -0,0 +1,13 @@
+q1 results: All test cases passed!
+
+q2b results: All test cases passed!
+
+q2 results: All test cases passed!
+
+q3 results: All test cases passed!
+
+q4 results: All test cases passed!
+
+q6 results: All test cases passed!
+
+q7 results: All test cases passed!
\ No newline at end of file
diff --git a/test/test_grade/test_integration.py b/test/test_grade/test_integration.py
index fc8e1700..8877aacd 100644
--- a/test/test_grade/test_integration.py
+++ b/test/test_grade/test_integration.py
@@ -37,6 +37,8 @@ def cleanup_output(cleanup_enabled):
             shutil.rmtree("test/submission_pdfs")
         if os.path.exists(ZIP_SUBM_PATH):
             os.remove(ZIP_SUBM_PATH)
+        if os.path.exists("test/grading-summaries"):
+            shutil.rmtree("test/grading-summaries")
 
 
 @pytest.fixture(autouse=True, scope="module")
@@ -362,3 +364,40 @@ def test_config_overrides_integration():
     got = got.reindex(sorted(got.columns), axis=1)
     want = want.reindex(sorted(want.columns), axis=1)
     assert got.equals(want)
+
+
+@mock.patch("otter.grade.launch_containers")
+def test_grade_summaries(mocked_launch_grade):
+    """
+    Checks that are grading summaries are written to the disck
+    """
+    mock_dfs = []
+    for filename in os.listdir(FILE_MANAGER.get_path("results")):
+        filename = os.path.splitext(filename)[0]
+        test_file_path = os.path.join("test/test_grade/files/results", f"{filename}.txt")
+        with open(test_file_path, 'r') as test_summary_file:
+            mock_dfs.append(pd.DataFrame([{
+                "percent_correct": 1.0,
+                "total_points_earned": 15.0,
+                "file": f"{filename}",
+                "summary": test_summary_file.read(),
+                "grading_status": "Completed"
+            }]))
+    mocked_launch_grade.return_value = mock_dfs
+
+    notebook_path = FILE_MANAGER.get_path("notebooks")
+    grade(
+        name = ASSIGNMENT_NAME,
+        paths = [notebook_path],
+        output_dir = "test/",
+        autograder = AG_ZIP_PATH,
+        summaries = True
+    )
+    for filename in os.listdir(notebook_path):
+        filename = os.path.splitext(filename)[0]
+        file_path = os.path.join("test/grading-summaries", f"{filename}.txt")
+        test_file_path = os.path.join("test/test_grade/files/results", f"{filename}.txt")
+        if os.path.isfile(file_path):
+            assert os.path.exists(file_path)
+            with open(file_path, 'r') as summary_file, open(test_file_path, 'r') as test_summary_file:
+                assert summary_file.read() == test_summary_file.read()