From 4a156d4e43d328e20a1db2e51b0efcf305e61137 Mon Sep 17 00:00:00 2001 From: Myles Scolnick Date: Thu, 12 Dec 2024 13:28:45 -0500 Subject: [PATCH] experimental: smarted detection of edited code --- frontend/package.json | 2 + frontend/pnpm-lock.yaml | 21 +- frontend/src/core/cells/cells.ts | 10 +- .../core/codemirror/__tests__/edited.test.ts | 259 ++++++++++++++++++ frontend/src/core/codemirror/edited.ts | 107 ++++++++ 5 files changed, 391 insertions(+), 8 deletions(-) create mode 100644 frontend/src/core/codemirror/__tests__/edited.test.ts create mode 100644 frontend/src/core/codemirror/edited.ts diff --git a/frontend/package.json b/frontend/package.json index c82a9bf6386..0b346dd1456 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -37,6 +37,7 @@ "@lezer/common": "^1.2.1", "@lezer/highlight": "^1.2.1", "@lezer/lr": "^1.4.2", + "@lezer/python": "^1.1.15", "@marimo-team/marimo-api": "file:../openapi", "@marimo-team/react-slotz": "^0.1.8", "@open-rpc/client-js": "^1.8.1", @@ -107,6 +108,7 @@ "js-cookie": "^3.0.5", "katex": "^0.16.11", "lodash-es": "^4.17.21", + "lru-cache": "^11.0.2", "lucide-react": "^0.428.0", "lz-string": "^1.5.0", "mermaid": "^11.3.0", diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 385352284f8..d07452a465d 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -87,6 +87,9 @@ importers: '@lezer/lr': specifier: ^1.4.2 version: 1.4.2 + '@lezer/python': + specifier: ^1.1.15 + version: 1.1.15 '@marimo-team/marimo-api': specifier: file:../openapi version: file:../openapi @@ -297,6 +300,9 @@ importers: lodash-es: specifier: ^4.17.21 version: 4.17.21 + lru-cache: + specifier: ^11.0.2 + version: 11.0.2 lucide-react: specifier: ^0.428.0 version: 0.428.0(react@18.3.1) @@ -1469,8 +1475,8 @@ packages: '@lezer/php@1.0.1': resolution: {integrity: sha512-aqdCQJOXJ66De22vzdwnuC502hIaG9EnPK2rSi+ebXyUd+j7GAX1mRjWZOVOmf3GST1YUfUCu6WXDiEgDGOVwA==} - '@lezer/python@1.1.7': - resolution: {integrity: sha512-RbhKQ9+Y/r/Xv6OcJmETEM5tBFdpdAJRqrgi3akJkWBLCuiAaLP/jKdYzu+ICljaSXPCQeznrv+r9HUEnjq3HQ==} + '@lezer/python@1.1.15': + resolution: {integrity: sha512-aVQ43m2zk4FZYedCqL0KHPEUsqZOrmAvRhkhHlVPnDD1HODDyyQv5BRIuod4DadkgBEZd53vQOtXTonNbEgjrQ==} '@lezer/rust@1.0.1': resolution: {integrity: sha512-j+ToFKM6Wpglv3OQ4ebHYdYIMT2dh0ziCCV0rTf47AWiHOVhR0WjaKrBq+yuvDQNEhr5sxPxVI7+naJIgpqcsQ==} @@ -6289,6 +6295,10 @@ packages: loupe@3.1.2: resolution: {integrity: sha512-23I4pFZHmAemUnz8WZXbYRSKYj801VDaNv9ETuMh7IrMc7VuVVSo+Z9iLE3ni30+U48iDWfi30d3twAXBYmnCg==} + lru-cache@11.0.2: + resolution: {integrity: sha512-123qHRfJBmo2jXDbo/a5YOQrJoHF/GNQTLzQ5+IdK5pWpceK17yRc6ozlWd25FxvGKQbIUs91fDFkXmDHTKcyA==} + engines: {node: 20 || >=22} + lru-cache@5.1.1: resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} @@ -9458,7 +9468,7 @@ snapshots: '@codemirror/language': 6.10.3 '@codemirror/state': 6.4.1 '@lezer/common': 1.2.1 - '@lezer/python': 1.1.7 + '@lezer/python': 1.1.15 transitivePeerDependencies: - '@codemirror/view' @@ -10061,8 +10071,9 @@ snapshots: '@lezer/highlight': 1.2.1 '@lezer/lr': 1.4.2 - '@lezer/python@1.1.7': + '@lezer/python@1.1.15': dependencies: + '@lezer/common': 1.2.1 '@lezer/highlight': 1.2.1 '@lezer/lr': 1.4.2 @@ -15958,6 +15969,8 @@ snapshots: loupe@3.1.2: {} + lru-cache@11.0.2: {} + lru-cache@5.1.1: dependencies: yallist: 3.1.1 diff --git a/frontend/src/core/cells/cells.ts b/frontend/src/core/cells/cells.ts index 15a2a1587b6..b9e90e396d1 100644 --- a/frontend/src/core/cells/cells.ts +++ b/frontend/src/core/cells/cells.ts @@ -51,6 +51,7 @@ import { } from "@/utils/id-tree"; import { isEqual } from "lodash-es"; import { isErrorMime } from "../mime"; +import { areLogicallyDifferent } from "../codemirror/edited"; export const SCRATCH_CELL_ID = "__scratch__" as CellId; @@ -246,7 +247,7 @@ const { code, lastCodeRun, lastExecutionTime, - edited: Boolean(code) && code !== lastCodeRun, + edited: Boolean(code) && areLogicallyDifferent(code, lastCodeRun), }), }, cellRuntime: { @@ -640,7 +641,7 @@ const { : { ...cell, code: code, - edited: code.trim() !== cell.lastCodeRun, + edited: areLogicallyDifferent(code, cell.lastCodeRun), }; }); }, @@ -987,7 +988,7 @@ const { code: beforeCursorCode, edited: Boolean(beforeCursorCode) && - beforeCursorCode.trim() !== cell.lastCodeRun?.trim(), + areLogicallyDifferent(beforeCursorCode, cell.lastCodeRun), }, [newCellId]: createCell({ id: newCellId, @@ -1035,7 +1036,8 @@ const { ...cell, code: snapshot, edited: - Boolean(snapshot) && snapshot?.trim() !== cell.lastCodeRun?.trim(), + Boolean(snapshot) && + areLogicallyDifferent(snapshot, cell.lastCodeRun), }, }, cellRuntime: { diff --git a/frontend/src/core/codemirror/__tests__/edited.test.ts b/frontend/src/core/codemirror/__tests__/edited.test.ts new file mode 100644 index 00000000000..cf081b1efb3 --- /dev/null +++ b/frontend/src/core/codemirror/__tests__/edited.test.ts @@ -0,0 +1,259 @@ +/* Copyright 2024 Marimo. All rights reserved. */ +import { describe, expect, test } from "vitest"; +import { areLogicallyDifferent } from "../edited"; + +describe("areLogicallyDifferent - same code", () => { + test("identical code returns false", () => { + const code = "def foo(): return 42"; + expect(areLogicallyDifferent(code, code)).toBe(false); + }); + + test("different whitespace returns false", () => { + const code1 = "def foo():\n\n\n return 42"; + const code2 = "def foo(): return 42"; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + }); + + test("different comments return false", () => { + const code1 = "def foo(): # first comment\n return 42"; + const code2 = "def foo(): # different comment\n return 42"; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + }); + + test("new lines before and after return statements are ignored", () => { + const code1 = "def foo(): return 42\n"; + const code2 = "def foo(): return 42\n\n"; + const code3 = "\n\ndef foo(): return 42"; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + expect(areLogicallyDifferent(code1, code3)).toBe(false); + expect(areLogicallyDifferent(code2, code3)).toBe(false); + }); + + test("handles unicode characters", () => { + const code1 = 'print("Hello 世界")'; + const code2 = 'print("Hello 世界")'; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + }); + + test("complex class definitions", () => { + const code1 = ` + class MyClass: + def __init__(self): + self.x = 42 + + def method(self): + return self.x + `; + const code2 = ` + class MyClass: + def __init__(self): # Initialize + self.x = 42 # Set x + + def method(self): # Get x + return self.x # Return it + `; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + }); + + test("function with types", () => { + expect( + areLogicallyDifferent( + "def foo(x: int) -> int: return x", + "def foo(x: int) -> int: return x", + ), + ).toBe(false); + }); +}); + +describe("areLogicallyDifferent - different code", () => { + test("multiline comments return true", () => { + const code1 = `""" + This is a long docstring + with multiple lines + """ + def foo(): return 42`; + const code2 = `"""Different docstring""" + def foo(): return 42`; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different function names return true", () => { + const code1 = "def foo(): return 42"; + const code2 = "def bar(): return 42"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("function with parsing error", () => { + const code1 = "def foo(): return 42"; + const code2 = "def foo():\nreturn 42"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different string literals return true", () => { + const code1 = 'print("hello")'; + const code2 = 'print("world")'; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different number literals return true", () => { + const code1 = "x = 42"; + const code2 = "x = 43"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different operators return true", () => { + const code1 = "x + y"; + const code2 = "x - y"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different control flow returns true", () => { + const code1 = "if x: pass"; + const code2 = "while x: pass"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different indentation structure returns true", () => { + const code1 = "if x:\n if y: pass"; + const code2 = "if x:\n pass\nif y: pass"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("handles invalid Python syntax", () => { + const code1 = "this is not python"; + const code2 = "also not python"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("decorators are preserved", () => { + const code1 = "@decorator\ndef func(): pass"; + const code2 = "def func(): pass"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("type hints affect comparison", () => { + const code1 = "def func(x: int) -> int: return x"; + const code2 = "def func(x): return x"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("different string quote styles return true", () => { + const code1 = 'x = "hello"'; + const code2 = "x = 'hello'"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); +}); + +describe("areLogicallyDifferent - advanced Python features", () => { + test("lambda functions", () => { + expect( + areLogicallyDifferent("x = lambda a: a + 1", "x = lambda b: b + 1"), + ).toBe(true); + + expect( + areLogicallyDifferent("x = lambda a: a + 1", "x = lambda a: a - 1"), + ).toBe(true); + }); + + test("list comprehensions", () => { + expect( + areLogicallyDifferent( + "[x for x in range(10) if x % 2 == 0]", + "[x for x in range(10) if x % 2 == 1]", + ), + ).toBe(true); + }); + + test("try/except blocks", () => { + const code1 = ` + try: + raise ValueError() + except ValueError: + pass + `; + const code2 = ` + try: + raise ValueError() + except Exception: + pass + `; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("multiline strings with different line endings", () => { + expect( + areLogicallyDifferent( + 'x = """hello\nworld\n"""', + 'x = """hello\r\nworld\r\n"""', + ), + ).toBe(true); + }); + + test("nested function definitions", () => { + const code1 = ` + def outer(): + def inner(): pass + return inner + `; + const code2 = ` + def outer(): + def different(): pass + return different + `; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("number literals in different bases", () => { + expect(areLogicallyDifferent("x = 0xFF", "x = 255")).toBe(true); + + expect(areLogicallyDifferent("x = 0o10", "x = 8")).toBe(true); + }); + + test("f-strings vs regular strings", () => { + expect(areLogicallyDifferent('x = f"hello {42}"', 'x = "hello 42"')).toBe( + true, + ); + }); + + test("async functions", () => { + const code1 = "async def foo(): pass"; + const code2 = "def foo(): pass"; + expect(areLogicallyDifferent(code1, code2)).toBe(true); + }); + + test("multiple assignments", () => { + expect(areLogicallyDifferent("a = b = c = 1", "a, b, c = 1, 1, 1")).toBe( + true, + ); + }); + + test("walrus operator", () => { + expect( + areLogicallyDifferent("if (x := 42) > 0: pass", "x = 42\nif x > 0: pass"), + ).toBe(true); + }); + + test("function with types", () => { + expect( + areLogicallyDifferent( + "def foo(x: int) -> int: return x", + "def foo(x: float) -> int: return x", + ), + ).toBe(true); + }); +}); + +test("handles empty strings", () => { + expect(areLogicallyDifferent("", "")).toBe(false); + expect(areLogicallyDifferent("pass", "")).toBe(true); +}); + +test("handles invalid python code", () => { + const code1 = "x = 1 +"; + const code2 = "x = 1 +"; + expect(areLogicallyDifferent(code1, code2)).toBe(false); + + const code3 = "x = 1 +"; + const code4 = "x = 1 -"; + expect(areLogicallyDifferent(code3, code4)).toBe(true); +}); diff --git a/frontend/src/core/codemirror/edited.ts b/frontend/src/core/codemirror/edited.ts new file mode 100644 index 00000000000..848f7f19e9a --- /dev/null +++ b/frontend/src/core/codemirror/edited.ts @@ -0,0 +1,107 @@ +/* Copyright 2024 Marimo. All rights reserved. */ +import type { Tree } from "@lezer/common"; +import { parser } from "@lezer/python"; +import { LRUCache } from "lru-cache"; +import { Logger } from "@/utils/Logger"; + +const IGNORE_NODES = new Set(["Comment", "LineComment", "BlockComment", " "]); + +// Helper function to get a simplified string representation of the AST +function getASTString(tree: Tree, code: string): string { + const result: string[] = []; + const cursor = tree.cursor(); + + do { + if (IGNORE_NODES.has(cursor.name)) { + continue; + } + + if (cursor.type.isError) { + result.push(code.slice(cursor.from, cursor.to) || "ERROR"); + continue; + } + + switch (cursor.name) { + case "VariableName": + case "AssignOp": + case "Number": + case "String": + case "Identifier": + case "ArithOp": + case "CompareOp": + case "BinaryOp": + case "UnaryOp": + result.push(`${cursor.name}(${code.slice(cursor.from, cursor.to)})`); + break; + default: + if (cursor.name) { + result.push(cursor.name); + } + } + } while (cursor.next()); + + return result.join(":"); +} + +// LRU cache for parsed trees +const treeCache = new LRUCache({ + max: 100, +}); + +function parseAndCache(code: string): Tree { + const cached = treeCache.get(code); + if (cached) { + return cached; + } + + const tree = parser.parse(code); + treeCache.set(code, tree); + return tree; +} + +/** + * Compares two Python code snippets for logical differences, + * ignoring whitespace, comments, and formatting differences. + * + * This function should return as fast as possible. + * + * Optimizations that won't work: + * - Quick comparison after stripping whitespace, because whitespace can + * exist in strings. + * - Quick node count comparison, since they may contain comments or other + * ignored nodes. + */ +export function areLogicallyDifferent( + code1: string | undefined | null, + code2: string | undefined | null, +): boolean { + if (code1 == null || code2 == null) { + return code1 !== code2; + } + + code1 = code1.trim(); + code2 = code2.trim(); + + // Quick equality check before parsing + if (code1 === code2) { + return false; + } + + // If either code is empty (after trimming), we consider them different + if (code1 === "" || code2 === "") { + return true; + } + + try { + const tree1 = parseAndCache(code1); + const tree2 = parseAndCache(code2); + + const ast1String = getASTString(tree1, code1); + const ast2String = getASTString(tree2, code2); + + return ast1String !== ast2String; + } catch { + Logger.error("Failed to parse code", { code1, code2 }); + return code1 !== code2; + } +}