Skip to content

Commit

Permalink
Make Slice Syntax Pythonic (#80)
Browse files Browse the repository at this point in the history
Fixed slicing semantics to be Pythonic in how they treat negative numbers.

Also, for naming specific tests, respect C4M_TEST_DIR when there isn't
an absolute path provided.
viega authored Jul 5, 2024
1 parent 49d539a commit c6a36f2
Showing 13 changed files with 166 additions and 24 deletions.
1 change: 1 addition & 0 deletions include/compiler/datatypes/parse.h
Original file line number Diff line number Diff line change
@@ -79,6 +79,7 @@ typedef enum {
c4m_nt_field_prop,
c4m_nt_expression,
c4m_nt_extern_box,
c4m_nt_elided,
#ifdef C4M_DEV
c4m_nt_print,
#endif
16 changes: 15 additions & 1 deletion include/con4m/path.h
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@ c4m_utf8_t *c4m_get_user_dir(c4m_utf8_t *);
c4m_utf8_t *c4m_get_current_directory(c4m_utf8_t *);
c4m_utf8_t *c4m_path_join(c4m_list_t *);
c4m_file_kind c4m_get_file_kind(c4m_utf8_t *);
c4m_list_t *_c4m_path_walk(c4m_utf8_t *, ...);
c4m_list_t *_c4m_path_walk(c4m_utf8_t *, ...);

#define c4m_path_walk(x, ...) _c4m_path_walk(x, C4M_VA(__VA_ARGS__))

@@ -40,3 +40,17 @@ c4m_get_user_name()

return c4m_new_utf8(pw->pw_name);
}

static inline c4m_utf8_t *
c4m_path_simple_join(c4m_utf8_t *p1, c4m_utf8_t *p2)
{
if (c4m_str_starts_with(p2, c4m_get_slash_const())) {
return p2;
}

c4m_list_t *x = c4m_list(c4m_type_utf8());
c4m_list_append(x, p1);
c4m_list_append(x, p2);

return c4m_path_join(x);
}
20 changes: 20 additions & 0 deletions src/con4m/compiler/check_pass.c
Original file line number Diff line number Diff line change
@@ -741,6 +741,21 @@ lookup_or_add(pass2_ctx *ctx, c4m_utf8_t *name)
return result;
}

static void
handle_elision(pass2_ctx *ctx)
{
c4m_pnode_t *cur = c4m_get_pnode(ctx->node);
c4m_pnode_t *parent = c4m_get_pnode(ctx->node->parent);

switch (parent->kind) {
case c4m_nt_range:
cur->type = c4m_type_int();
return;
default:
c4m_unreachable();
}
}

static void
handle_index(pass2_ctx *ctx)
{
@@ -756,6 +771,7 @@ handle_index(pass2_ctx *ctx)

use_context_enter(ctx);
process_child(ctx, 1);

ix1_type = c4m_type_resolve(get_pnode_type(ctx->node->children[1]));
pnode->type = node_type;

@@ -2270,6 +2286,10 @@ base_check_pass_dispatch(pass2_ctx *ctx)
handle_return(ctx);
break;

case c4m_nt_elided:
handle_elision(ctx);
break;

#ifdef C4M_DEV
case c4m_nt_print:
c4m_list_append(ctx->file_ctx->print_nodes, ctx->node);
66 changes: 64 additions & 2 deletions src/con4m/compiler/codegen.c
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ typedef enum {
assign_to_mem_slot,
assign_via_index_set_call,
assign_via_slice_set_call,
assign_via_len_then_slice_set_call,
} assign_type_t;

typedef struct {
@@ -1650,6 +1651,28 @@ gen_assign(gen_ctx *ctx)
case assign_via_slice_set_call:
gen_tcall(ctx, C4M_BI_SLICE_SET, ctx->cur_pnode->type);
break;
case assign_via_len_then_slice_set_call:
// Need to call len() on the object for the 2nd slice
// param. The 2nd slice parameter is supposed to get
// pushed on first though.
//
// Stash the value in R0.
emit(ctx, C4M_ZPopToR0);
// stash start ix
emit(ctx, C4M_ZPopToR1);
// Dupe the container.
emit(ctx, C4M_ZDupTop);
// Call len on the non-popped version.
gen_tcall(ctx, C4M_BI_LEN, ctx->cur_pnode->type);
// Push the index back.
emit(ctx, C4M_ZPushFromR1);
// Swap the two indices to be in the proper order.
emit(ctx, C4M_ZSwap);
// Push the value back.
emit(ctx, C4M_ZPushFromR0);
// Slice!
gen_tcall(ctx, C4M_BI_SLICE_SET, ctx->cur_pnode->type);
break;
case assign_via_index_set_call:
emit(ctx, C4M_ZSwap);
gen_tcall(ctx, C4M_BI_INDEX_SET, ctx->cur_pnode->type);
@@ -1739,23 +1762,59 @@ gen_index_or_slice(gen_ctx *ctx)

if (lvalue) {
if (slice) {
ctx->assign_method = assign_via_slice_set_call;
if (pnode->extra_info == (void *)1) {
ctx->assign_method = assign_via_len_then_slice_set_call;
}
else {
ctx->assign_method = assign_via_slice_set_call;
}
}
else {
ctx->assign_method = assign_via_index_set_call;
}
ctx->lvalue = true;
return;
}

if (slice) {
if (pnode->extra_info == (void *)1) {
// Need to call len() on the object for the 2nd slice
// param. The 2nd slice parameter is supposed to get
// pushed on first though.
//
// Stash the other index.
emit(ctx, C4M_ZPopToR0);
// Dupe the copy.
emit(ctx, C4M_ZDupTop);
// Call len on the dupe.
gen_tcall(ctx, C4M_BI_LEN, ctx->cur_pnode->type);
// Push the index back.
emit(ctx, C4M_ZPushFromR0);
// Swap positions.
emit(ctx, C4M_ZSwap);
}
gen_tcall(ctx, C4M_BI_SLICE_GET, ctx->cur_pnode->type);
}
else {
gen_tcall(ctx, C4M_BI_INDEX_GET, ctx->cur_pnode->type);
}
}

static inline void
gen_elision(gen_ctx *ctx)
{
// Right now, this is only for indexes on slices. If were' on the
// LHS, life is easy; we just emit an actual 0.
if (ctx->cur_node->parent->children[0] == ctx->cur_node) {
gen_load_immediate(ctx, 0);
return;
}

// Otherwise, we cheat a little bit here, and signal to
// gen_index_or_slice through the range pnode.
c4m_pnode_t *range_pnode = c4m_get_pnode(ctx->cur_node->parent);
range_pnode->extra_info = (void *)1;
}

static inline void
gen_sym_decl(gen_ctx *ctx)
{
@@ -1932,6 +1991,9 @@ gen_one_node(gen_ctx *ctx)
case c4m_nt_use:
gen_use(ctx);
break;
case c4m_nt_elided:
gen_elision(ctx);
break;
// The following list is still TODO:
case c4m_nt_varargs_param:
// These should always be passthrough.
20 changes: 18 additions & 2 deletions src/con4m/compiler/parse.c
Original file line number Diff line number Diff line change
@@ -229,6 +229,7 @@ static const node_type_info_t node_type_info[] = {
{ "nt_field_prop", 1, 0, 0, 0, 0, },
{ "nt_expression", 0, 0, 0, 0, 0, },
{ "nt_extern_box", 0, 0, 0, 0, 0, },
{ "nt_elided", 0, 0, 0, 0, 0, },
#ifdef C4M_DEV
{ "nt_print", 0, 0, 0, 0, 0, },
#endif
@@ -1602,7 +1603,13 @@ optional_range(parse_ctx *ctx, c4m_tree_node_t *lhs)
start_node(ctx, c4m_nt_range, true);
adopt_kid(ctx, lhs);

adopt_kid(ctx, expression(ctx));
if (tok_kind(ctx) == c4m_tt_rbracket) {
start_node(ctx, c4m_nt_elided, false);
end_node(ctx);
}
else {
adopt_kid(ctx, expression(ctx));
}
end_node(ctx);
return true;
}
@@ -3116,7 +3123,16 @@ index_expr(parse_ctx *ctx, c4m_tree_node_t *lhs)
temporary_tree(ctx, c4m_nt_index);
adopt_kid(ctx, lhs);
expect(ctx, c4m_tt_lbracket);
c4m_tree_node_t *item = expression(ctx);

c4m_tree_node_t *item;

if (tok_kind(ctx) == c4m_tt_colon) {
temporary_tree(ctx, c4m_nt_elided);
item = restore_tree(ctx);
}
else {
item = expression(ctx);
}

if (!optional_range(ctx, item)) {
adopt_kid(ctx, item);
1 change: 0 additions & 1 deletion src/con4m/grid.c
Original file line number Diff line number Diff line change
@@ -57,7 +57,6 @@ styled_repeat(c4m_codepoint_t c, uint32_t width, c4m_style_t style)
static inline c4m_utf32_t *
get_styled_pad(uint32_t width, c4m_style_t style)
{
assert(width < 200);
return styled_repeat(' ', width, style);
}

4 changes: 2 additions & 2 deletions src/con4m/hatlists.c
Original file line number Diff line number Diff line change
@@ -272,7 +272,7 @@ c4m_flexarray_get_slice(flexarray_t *list, int64_t start, int64_t end)
}
}
if (end < 0) {
end += len + 1;
end += len;
}
else {
if (end > len) {
@@ -315,7 +315,7 @@ c4m_flexarray_set_slice(flexarray_t *list, int64_t start, int64_t end, flexarray
}
}
if (end < 0) {
end += len1 + 1;
end += len1;
}
else {
if (end > len1) {
6 changes: 2 additions & 4 deletions src/con4m/list.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// "Exclusive" array, meaning not shared across threads. It's dynamic,
// and supports resizing.
#include "con4m.h"

static void
@@ -469,7 +467,7 @@ c4m_list_get_slice(c4m_list_t *list, int64_t start, int64_t end)
}
}
if (end < 0) {
end += len + 1;
end += len;
}
else {
if (end > len) {
@@ -516,7 +514,7 @@ c4m_list_set_slice(c4m_list_t *list,
}
}
if (end < 0) {
end += len1 + 1;
end += len1;
}
else {
if (end > len1) {
4 changes: 2 additions & 2 deletions src/con4m/string.c
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ c4m_str_slice(const c4m_str_t *instr, int64_t start, int64_t end)
}
}
if (end < 0) {
end += len + 1;
end += len;
}
else {
if (end > len) {
@@ -909,7 +909,7 @@ c4m_str_ends_with(const c4m_str_t *s1, const c4m_str_t *s2)
c4m_utf32_t *u1 = c4m_to_utf32(s1);
c4m_utf32_t *u2 = c4m_to_utf32(s2);

u1 = c4m_str_slice(u1, l1 - l2, -1);
u1 = c4m_str_slice(u1, l1 - l2, l1);

return c4m_str_eq(u1, u2);
}
4 changes: 2 additions & 2 deletions src/con4m/vm.c
Original file line number Diff line number Diff line change
@@ -532,9 +532,9 @@ c4m_vm_tcall(c4m_vmthread_t *tstate, c4m_zinstruction_t *i)
return;
case C4M_BI_SLICE_SET:
STACK_REQUIRE_VALUES(4);
// endIx = sp[3]
// container = sp[3]
// endIx = sp[2]
// startIx = sp[1]
// container = sp[2]
// value = sp[0]

c4m_slice_set(tstate->sp[3].rvalue.obj,
13 changes: 8 additions & 5 deletions src/tests/test.c
Original file line number Diff line number Diff line change
@@ -77,7 +77,7 @@ c4m_parse_kat(c4m_str_t *path, c4m_str_t *s)
err_basic_usage(path);
return NULL;
}
extract_errors(result, s, 9, -1);
extract_errors(result, s, 9, c4m_str_codepoint_len(s));
result->ignore_output = 1;
return result;
}
@@ -87,7 +87,7 @@ c4m_parse_kat(c4m_str_t *path, c4m_str_t *s)
err_basic_usage(path);
return NULL;
}
extract_output(result, s, 9, -1);
extract_output(result, s, 9, c4m_str_codepoint_len(s));
return result;
}

@@ -98,11 +98,11 @@ c4m_parse_kat(c4m_str_t *path, c4m_str_t *s)

if (errix != 0) {
extract_output(result, s, 9, errix);
extract_errors(result, s, errix + 9, -1);
extract_errors(result, s, errix + 9, c4m_str_codepoint_len(s));
}
else {
extract_errors(result, s, 9, outix);
extract_output(result, s, outix + 9, -1);
extract_output(result, s, outix + 9, c4m_str_codepoint_len(s));
}

return result;
@@ -230,7 +230,8 @@ build_file_list()

for (int i = 0; i < n; i++) {
c4m_utf8_t *s = c4m_to_utf8(c4m_list_get(argv, i, NULL));
s = c4m_resolve_path(s);
s = c4m_resolve_path(c4m_path_simple_join(test_dir, s));

switch (c4m_get_file_kind(s)) {
case C4M_FK_IS_REG_FILE:
case C4M_FK_IS_FLINK:
@@ -259,6 +260,7 @@ build_file_list()
}

n = c4m_list_len(to_recurse);

for (int i = 0; i < n; i++) {
int num_hits = 0;
c4m_utf8_t *path = c4m_list_get(to_recurse, i, NULL);
@@ -269,6 +271,7 @@ build_file_list()
int walk_len = c4m_list_len(files);
for (int j = 0; j < walk_len; j++) {
c4m_utf8_t *one = c4m_list_get(files, j, NULL);

if (c4m_str_ends_with(one, ext)) {
kat = c4m_extract_kat(one);
// When scanning dirs, if we have test cases that span
6 changes: 3 additions & 3 deletions tests/list.c4m
Original file line number Diff line number Diff line change
@@ -26,9 +26,9 @@ $output:
30
40
40
90
[10, 100]
110
50
[10, 100, 40]
150
"""

x = [1, 2, 3, 4]
29 changes: 29 additions & 0 deletions tests/slice.c4m
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Ensure slice semantics match Python's.
"""
"""
$output:
[0, 1, 2, 3, 4, 5]
[0, 1, 2]
[0, 1, 2]
[3, 4, 5]
[3, 4]
[0, 1, 2, 3]
[4, 3, 2, 1, 5]
foob

"""
x = [0, 1, 2, 3, 4, 5]

print(x[:])
print(x[0:3])
print(x[:3])
print(x[3:])
print(x[-3:-1])
print(x[:-2])
x[:-1] = [4, 3, 2, 1]
print(x)


s = "foobar"
print(s[:-2])

0 comments on commit c6a36f2

Please sign in to comment.