diff --git a/.gitignore b/.gitignore index 3b2042ce..268fa4ce 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ subprojects/*/ .meson_last .vscode .cache +deps/local diff --git a/deps/Darwin-arm64/libffi.a b/deps/Darwin-arm64/libffi.a new file mode 100644 index 00000000..d0b7331e Binary files /dev/null and b/deps/Darwin-arm64/libffi.a differ diff --git a/deps/linux-amd64/libffi.a b/deps/linux-amd64/libffi.a new file mode 100644 index 00000000..8811b7ab Binary files /dev/null and b/deps/linux-amd64/libffi.a differ diff --git a/deps/linux-arm64/libffi.a b/deps/linux-arm64/libffi.a new file mode 100644 index 00000000..b0112f1a Binary files /dev/null and b/deps/linux-arm64/libffi.a differ diff --git a/dev b/dev index 3fa74c1d..15277e60 100755 --- a/dev +++ b/dev @@ -20,6 +20,9 @@ function color { } OS=$(uname -o 2>/dev/null) +ARCH=$(uname -m 2>/dev/null) +PWD=$(pwd) + if [[ $? -ne 0 ]] ; then # Older macOS/OSX versions of uname don't support -o OS=$(uname -s) @@ -32,6 +35,8 @@ function log { function meson_build { echo ${1} > .meson_last + rm deps/local 2>/dev/null + ln -s ${PWD}/deps/${OS}-${ARCH} ${PWD}/deps/local if [[ ! -d ${1} ]]; then diff --git a/include/compiler/datatypes/error.h b/include/compiler/datatypes/error.h index 989cee89..3fb1c482 100644 --- a/include/compiler/datatypes/error.h +++ b/include/compiler/datatypes/error.h @@ -173,6 +173,9 @@ typedef enum { c4m_err_augmented_assign_to_slice, c4m_warn_cant_export, c4m_err_assigned_void, + c4m_err_callback_no_match, + c4m_err_callback_bad_target, + c4m_err_callback_type_mismatch, c4m_err_last, } c4m_compile_error_t; diff --git a/include/compiler/datatypes/file.h b/include/compiler/datatypes/file.h index 3684a189..25d45b65 100644 --- a/include/compiler/datatypes/file.h +++ b/include/compiler/datatypes/file.h @@ -46,6 +46,8 @@ typedef struct c4m_file_compile_ctx { c4m_xlist_t *fn_def_syms; // Cache of fns defined. c4m_zmodule_info_t *module_object; c4m_xlist_t *call_patch_locs; + c4m_xlist_t *callback_literals; + c4m_xlist_t *extern_decls; int32_t static_size; uint32_t num_params; uint16_t local_module_id; diff --git a/include/compiler/datatypes/nodeinfo.h b/include/compiler/datatypes/nodeinfo.h index 942d2981..f5798f3a 100644 --- a/include/compiler/datatypes/nodeinfo.h +++ b/include/compiler/datatypes/nodeinfo.h @@ -1,69 +1,6 @@ #pragma once #include "con4m.h" -typedef struct { - c4m_utf8_t *litmod; - c4m_lit_syntax_t st; - c4m_type_t *cast_to; - c4m_builtin_t base_type; // only set for containers from here down. - int num_items; - c4m_type_t *type; -} c4m_lit_info_t; - -typedef struct { - c4m_type_t *full_type; - c4m_scope_t *fn_scope; - c4m_scope_t *formals; - c4m_fn_param_info_t *param_info; - c4m_fn_param_info_t return_info; - int num_params; - unsigned int pure : 1; - unsigned int void_return : 1; -} c4m_sig_info_t; - -typedef struct { - c4m_utf8_t *short_doc; - c4m_utf8_t *long_doc; - c4m_sig_info_t *signature_info; - struct c4m_cfg_node_t *cfg; - int32_t frame_size; - // sc = 'short circuit' - // If we are a 'once' function, this is the offset into static data, - // where we will place: - // - // - A boolean. - // - A pthread_mutex_t - // - A void * - // - // The idea is, if the boolean is true, we only ever read and - // return the cached (memoized) result, stored in the void *. If - // it's false, we grab the lock, check the boolean a second time, - // run thecm function, set the memo and the boolean, and then - // unlock. - int32_t sc_lock_offset; - int32_t sc_bool_offset; - int32_t sc_memo_offset; - int32_t local_id; - int32_t offset; - int32_t module_id; - - unsigned int private : 1; - unsigned int once : 1; -} c4m_fn_decl_t; - -typedef struct { - c4m_utf8_t *short_doc; - c4m_utf8_t *long_doc; - c4m_utf8_t *local_name; - c4m_sig_info_t *local_params; - int num_params; - c4m_utf8_t *external_name; - uint8_t *external_params; - uint8_t external_return_type; - int holds; - int allocs; -} c4m_ffi_decl_t; - // This data structure is the first bytes of the extra_info field for anything // that might be a jump target, including loops, conditionals, case statements, // etc. diff --git a/include/compiler/datatypes/scope.h b/include/compiler/datatypes/scope.h index 02237fc6..88e0f798 100644 --- a/include/compiler/datatypes/scope.h +++ b/include/compiler/datatypes/scope.h @@ -51,16 +51,6 @@ typedef struct { c4m_utf8_t *specified_uri; } c4m_module_info_t; -// For extern entries, the data structure will be in the `value` -// field. - -typedef struct { - c4m_utf8_t *name; - c4m_type_t *type; - unsigned int ffi_holds : 1; - unsigned int ffi_allocs : 1; -} c4m_fn_param_info_t; - typedef struct c4m_scope_entry_t { // The `value` field gets the proper value for vars and enums, but // for other types, it gets a pointer to one of the specific data diff --git a/include/compiler/parse.h b/include/compiler/parse.h index eee7c1a8..f04d1a52 100644 --- a/include/compiler/parse.h +++ b/include/compiler/parse.h @@ -109,6 +109,7 @@ typedef struct pass1_ctx { c4m_file_compile_ctx *file_ctx; c4m_scope_t *static_scope; bool in_func; + c4m_xlist_t *extern_decls; } pass1_ctx; static inline c4m_tree_node_t * diff --git a/include/con4m.h b/include/con4m.h index e2451c1f..d929c758 100644 --- a/include/con4m.h +++ b/include/con4m.h @@ -103,3 +103,5 @@ #include "compiler/codegen.h" #include "con4m/set.h" + +#include "con4m/ffi.h" diff --git a/include/con4m/datatypes.h b/include/con4m/datatypes.h index 9b85f312..2c86d8f3 100644 --- a/include/con4m/datatypes.h +++ b/include/con4m/datatypes.h @@ -5,8 +5,8 @@ typedef struct hatrack_set_st c4m_set_t; #include "con4m/datatypes/box.h" #include "con4m/datatypes/memory.h" #include "con4m/datatypes/kargs.h" -#include "con4m/datatypes/literals.h" #include "con4m/datatypes/objects.h" +#include "con4m/datatypes/literals.h" #include "con4m/datatypes/colors.h" #include "con4m/datatypes/codepoints.h" #include "con4m/datatypes/styles.h" @@ -23,14 +23,16 @@ typedef struct hatrack_set_st c4m_set_t; #include "con4m/datatypes/exceptions.h" #include "con4m/datatypes/mixed.h" #include "con4m/datatypes/tuples.h" -#include "con4m/datatypes/callbacks.h" #include "con4m/datatypes/streams.h" #include "con4m/datatypes/format.h" -#include "con4m/datatypes/vm.h" #include "compiler/datatypes/lex.h" #include "compiler/datatypes/error.h" #include "compiler/datatypes/parse.h" #include "compiler/datatypes/scope.h" +#include "con4m/datatypes/ffi.h" +#include "con4m/datatypes/ufi.h" +#include "con4m/datatypes/vm.h" +#include "con4m/datatypes/callbacks.h" #include "compiler/datatypes/nodeinfo.h" #include "compiler/datatypes/spec.h" #include "compiler/datatypes/cfg.h" diff --git a/include/con4m/datatypes/callbacks.h b/include/con4m/datatypes/callbacks.h index 993aa70c..e8eea3ec 100644 --- a/include/con4m/datatypes/callbacks.h +++ b/include/con4m/datatypes/callbacks.h @@ -1,54 +1,11 @@ #pragma once - -typedef enum { - FFI_ABI_0 = 0, - FFI_ABI_1 = 1, - FFI_ABI_2 = 2, -} c4m_ffi_abi; - -typedef struct _c4m_ffi_type { - size_t size; - unsigned short alignment; - unsigned short type; - struct _c4m_ffi_type **elements; -} c4m_ffi_type; - -typedef enum { - FFI_OK = 0, - FFI_BAD_TYPEDEF, - FFI_BAD_ABI -} c4m_ffi_status; - -typedef struct { - c4m_ffi_abi abi; - unsigned nargs; - c4m_ffi_type **arg_types; - c4m_ffi_type *rtype; - unsigned bytes; - unsigned flags; -} c4m_ffi_cif; - #include "con4m.h" typedef struct { - c4m_ffi_cif call_interface; - c4m_ffi_abi abi; - c4m_ffi_type return_type; - unsigned int fixedargs; - c4m_ffi_type *arg_types; -} c4m_ffi_info_t; - -typedef struct { - void *fn; // Can point into the VM or to ELF fn - c4m_ffi_info_t *ffi; - c4m_type_t *type; - char *name; - uint8_t flags; -} c4m_funcinfo_t; - -typedef struct { - c4m_funcinfo_t *info; // Shared when possible. - bool bound; + c4m_utf8_t *target_symbol_name; + c4m_type_t *target_type; + c4m_funcinfo_t binding; + c4m_tree_node_t *decl_loc; } c4m_callback_t; #define C4M_CB_FLAG_FFI 1 diff --git a/include/con4m/datatypes/ffi.h b/include/con4m/datatypes/ffi.h new file mode 100644 index 00000000..7a84e0ca --- /dev/null +++ b/include/con4m/datatypes/ffi.h @@ -0,0 +1,98 @@ +#pragma once +#include "con4m.h" + +// For the foreign function interface, it's easier to redeclare +// libffi's structures than to deal w/ ensuring we have the right .h +// on each architecture. The only thing we need that might be +// different on different platforms is the ABI; hopefully +// FFI_DEFAUL T_ABI is the same everywhere, but if it isn't, that's +// easier to deal with than the header file situation. +// + +typedef enum { + C4M_FFI_FIRST_ABI = 0, + C4M_FFI_DEFAULT_ABI, + C4M_FFI_LAST_ABI, +} c4m_ffi_abi; + +// This is libffi's `ffi_type`. +typedef struct c4m_ffi_type { + size_t size; + unsigned short alignment; + unsigned short ffitype; + struct c4m_ffi_type **elements; +} c4m_ffi_type; + +// This is libffi's `ffi_cif` type. +typedef struct { + c4m_ffi_abi abi; + unsigned nargs; + c4m_ffi_type **arg_types; + c4m_ffi_type *rtype; + unsigned bytes; + unsigned flags; + // Currently, no platform in libffi takes more than two 'unsigned int's + // worth of space, so only one of these should be necessary, but + // adding an extra one just in case; enough platforms take two fields + // that I can see there eventually being a platform w/ 2 64-bit slots. + // We alloc ourselves based on this size, so no worries there. + uint64_t extra_cif1; + uint64_t extra_cif2; +} c4m_ffi_cif; + +typedef struct { + void *fptr; + c4m_utf8_t *local_name; + c4m_utf8_t *extern_name; + uint64_t str_convert; + uint64_t hold_info; + uint64_t alloc_info; + c4m_ffi_cif cif; + c4m_ffi_type **args; + c4m_ffi_type *ret; +} c4m_zffi_cif; + +typedef enum { + C4M_FFI_OK = 0, + C4M_FFI_BAD_TYPEDEF, + C4M_FFI_BAD_ABI, + C4M_FFI_BAD_ARGTYPE +} c4m_ffi_status; + +typedef struct c4m_ffi_decl_t { + c4m_utf8_t *short_doc; + c4m_utf8_t *long_doc; + c4m_utf8_t *local_name; + struct c4m_sig_info_t *local_params; + int num_ext_params; + int global_ffi_call_ix; + c4m_utf8_t *external_name; + uint8_t *external_params; + uint8_t external_return_type; + c4m_xlist_t *dll_list; + c4m_zffi_cif cif; +} c4m_ffi_decl_t; + +extern c4m_ffi_type ffi_type_void; +extern c4m_ffi_type ffi_type_uint8; +extern c4m_ffi_type ffi_type_sint8; +extern c4m_ffi_type ffi_type_uint16; +extern c4m_ffi_type ffi_type_sint16; +extern c4m_ffi_type ffi_type_uint32; +extern c4m_ffi_type ffi_type_sint32; +extern c4m_ffi_type ffi_type_uint64; +extern c4m_ffi_type ffi_type_sint64; +extern c4m_ffi_type ffi_type_float; +extern c4m_ffi_type ffi_type_double; +extern c4m_ffi_type ffi_type_pointer; + +#define ffi_type_uchar ffi_type_uint8 +#define ffi_type_schar ffi_type_sint8 +#define ffi_type_ushort ffi_type_uint16 +#define ffi_type_sshort ffi_type_sint16 +#define ffi_type_ushort ffi_type_uint16 +#define ffi_type_sshort ffi_type_sint16 +#define ffi_type_uint ffi_type_uint32 +#define ffi_type_sint ffi_type_sint32 +#define ffi_type_ulong ffi_type_uint64 +#define ffi_type_slong ffi_type_sint64 diff --git a/include/con4m/datatypes/literals.h b/include/con4m/datatypes/literals.h index ebafb7ff..dc239963 100644 --- a/include/con4m/datatypes/literals.h +++ b/include/con4m/datatypes/literals.h @@ -13,3 +13,12 @@ typedef enum { ST_Tuple = 8, ST_MAX = 9 } c4m_lit_syntax_t; + +typedef struct { + struct c4m_str_t *litmod; + c4m_lit_syntax_t st; + struct c4m_type_t *cast_to; + c4m_builtin_t base_type; // only set for containers from here down. + struct c4m_type_t *type; + int num_items; +} c4m_lit_info_t; diff --git a/include/con4m/datatypes/memory.h b/include/con4m/datatypes/memory.h index 243a3de2..872f0069 100644 --- a/include/con4m/datatypes/memory.h +++ b/include/con4m/datatypes/memory.h @@ -74,6 +74,7 @@ typedef struct c4m_finalizer_info_t { typedef struct c4m_arena_t { c4m_alloc_hdr *next_alloc; c4m_dict_t *roots; + c4m_set_t *external_holds; // queue_t *late_mutations; uint64_t *heap_end; c4m_finalizer_info_t *to_finalize; diff --git a/include/con4m/datatypes/strings.h b/include/con4m/datatypes/strings.h index 476de97b..bf688283 100644 --- a/include/con4m/datatypes/strings.h +++ b/include/con4m/datatypes/strings.h @@ -6,7 +6,7 @@ ** NOT to distinguish whether strings are UTF-8 (the high bit will ** always be 0 with UTF-8). **/ -typedef struct { +typedef struct c4m_str_t { // clang-format off alignas(8) int32_t codepoints; diff --git a/include/con4m/datatypes/ufi.h b/include/con4m/datatypes/ufi.h new file mode 100644 index 00000000..92f6f808 --- /dev/null +++ b/include/con4m/datatypes/ufi.h @@ -0,0 +1,60 @@ +#pragma once +#include "con4m.h" + +typedef struct { + c4m_utf8_t *name; + c4m_type_t *type; + unsigned int ffi_holds : 1; + unsigned int ffi_allocs : 1; +} c4m_fn_param_info_t; + +typedef struct c4m_sig_info_t { + c4m_type_t *full_type; + c4m_fn_param_info_t *param_info; + c4m_fn_param_info_t return_info; + int num_params; + unsigned int pure : 1; + unsigned int void_return : 1; + c4m_scope_t *fn_scope; + c4m_scope_t *formals; +} c4m_sig_info_t; + +typedef struct { + c4m_utf8_t *short_doc; + c4m_utf8_t *long_doc; + c4m_sig_info_t *signature_info; + struct c4m_cfg_node_t *cfg; + int32_t frame_size; + // sc = 'short circuit' + // If we are a 'once' function, this is the offset into static data, + // where we will place: + // + // - A boolean. + // - A pthread_mutex_t + // - A void * + // + // The idea is, if the boolean is true, we only ever read and + // return the cached (memoized) result, stored in the void *. If + // it's false, we grab the lock, check the boolean a second time, + // run thecm function, set the memo and the boolean, and then + // unlock. + int32_t sc_lock_offset; + int32_t sc_bool_offset; + int32_t sc_memo_offset; + int32_t local_id; + int32_t offset; + int32_t module_id; + + unsigned int private : 1; + unsigned int once : 1; +} c4m_fn_decl_t; + +typedef struct c4m_funcinfo_t { + union { + c4m_ffi_decl_t *ffi_interface; + c4m_fn_decl_t *local_interface; + } implementation; + + unsigned int ffi : 1; + unsigned int va : 1; +} c4m_funcinfo_t; diff --git a/include/con4m/datatypes/vm.h b/include/con4m/datatypes/vm.h index b597d7cd..71d78539 100644 --- a/include/con4m/datatypes/vm.h +++ b/include/con4m/datatypes/vm.h @@ -407,36 +407,15 @@ typedef union c4m_stack_value_t { void *vptr; uint64_t uint; int64_t sint; // signed int values. + c4m_box_t box; double dbl; bool boolean; char *cptr; union c4m_stack_value_t *fp; // saved fp } c4m_stack_value_t; -typedef struct { - // whether passing a pointer to the thing causes it to hold the pointer, - // in which case decref must be explicit. - bool held; - // this passes a value back that was allocated in the FFI. - bool alloced; - // an index into the CTypeNames data structure in ffi.nim. - int16_t arg_type; - // To look up any FFI processing we do for the type. - int32_t our_type; - c4m_str_t *name; -} c4m_zffi_arg_info_t; - -typedef struct { - int64_t nameoffset; - int64_t localname; - int32_t mid; // module_id - c4m_type_t *tid; - bool va; - c4m_xlist_t *dlls; // int64_t - c4m_xlist_t *arg_info; // tspec_ref: c4m_zffi_arg_info_t - c4m_str_t *shortdoc; - c4m_str_t *longdoc; -} c4m_zffi_info_t; +// Might want to trim a bit out of it, but for right now, an going to not. +typedef struct c4m_ffi_decl_t c4m_zffi_info_t; typedef struct { int64_t offset; @@ -555,6 +534,8 @@ typedef struct { c4m_dict_t *attrs; // string, c4m_attr_contents_t (tspec_ref) c4m_set_t *all_sections; // string c4m_dict_t *section_docs; // string, c4m_docs_container_t (tspec_ref) + c4m_xlist_t *ffi_info; + int ffi_info_entries; bool using_attrs; } c4m_vm_t; diff --git a/include/con4m/ffi.h b/include/con4m/ffi.h new file mode 100644 index 00000000..92fde203 --- /dev/null +++ b/include/con4m/ffi.h @@ -0,0 +1,22 @@ +#pragma once +#include "con4m.h" + +extern void c4m_add_static_function(c4m_utf8_t *, void *); +extern void *c4m_ffi_find_symbol(c4m_utf8_t *, c4m_xlist_t *); +extern int64_t c4m_lookup_ctype_id(char *); +extern c4m_ffi_type *c4m_ffi_arg_type_map(uint8_t); +extern void *c4m_ref_via_ffi_type(c4m_box_t *, c4m_ffi_type *); +extern c4m_ffi_status ffi_prep_cif(c4m_ffi_cif *, + c4m_ffi_abi, + unsigned int, + c4m_ffi_type *, + c4m_ffi_type **); +extern c4m_ffi_status ffi_prep_cif_var(c4m_ffi_cif *, + c4m_ffi_abi, + unsigned int, + unsigned int, + c4m_ffi_type *, + c4m_ffi_type **); +extern void ffi_call(c4m_ffi_cif *, void *, void *, void **); + +#define C4M_CSTR_CTYPE_CONST 24 diff --git a/include/con4m/gc.h b/include/con4m/gc.h index 16517c22..b4a90b75 100644 --- a/include/con4m/gc.h +++ b/include/con4m/gc.h @@ -181,6 +181,8 @@ c4m_gc_malloc(size_t len) extern void c4m_get_stack_scan_region(uint64_t *top, uint64_t *bottom); extern void c4m_initialize_gc(); extern void c4m_gc_heap_stats(uint64_t *, uint64_t *, uint64_t *); +extern void c4m_gc_add_hold(c4m_obj_t); +extern void c4m_gc_remove_hold(c4m_obj_t); extern c4m_arena_t *c4m_internal_stash_heap(); extern void c4m_internal_unstash_heap(); extern void c4m_internal_set_heap(c4m_arena_t *); diff --git a/include/con4m/math.h b/include/con4m/math.h index ffdfd67b..ca5c64a5 100644 --- a/include/con4m/math.h +++ b/include/con4m/math.h @@ -1,6 +1,8 @@ // Random math stuff #include "con4m.h" +extern uint64_t c4m_clz(uint64_t); + static inline uint64_t c4m_int_log2(uint64_t n) { diff --git a/include/con4m/stream.h b/include/con4m/stream.h index 4e58618e..f493d630 100644 --- a/include/con4m/stream.h +++ b/include/con4m/stream.h @@ -159,6 +159,7 @@ c4m_file_iostream(c4m_str_t *filename, bool no_create) c4m_stream_t *c4m_get_stdin(); c4m_stream_t *c4m_get_stdout(); c4m_stream_t *c4m_get_stderr(); +void c4m_init_std_streams(); static inline bool c4m_stream_using_cookie(c4m_stream_t *s) diff --git a/meson.build b/meson.build index bf3336c1..c654c759 100644 --- a/meson.build +++ b/meson.build @@ -94,6 +94,7 @@ c4m_src = ['src/con4m/style.c', 'src/con4m/path.c', 'src/con4m/flags.c', 'src/con4m/box.c', + 'src/con4m/ffi.c', 'src/con4m/crypto/sha.c', 'src/con4m/compiler/compile.c', 'src/con4m/compiler/lex.c', @@ -165,7 +166,7 @@ test_src = ['src/tests/test.c'] threads = dependency('threads') math = cc.find_library('m', required : false) -ffi = cc.find_library('ffi') +ffi = cc.find_library('ffi', required : true, dirs: meson.current_source_dir() + '/deps/local/') crypto = cc.find_library('crypto') ssl = cc.find_library('ssl') diff --git a/src/con4m/callback.c b/src/con4m/callback.c index 2be8ee77..a6f439e0 100644 --- a/src/con4m/callback.c +++ b/src/con4m/callback.c @@ -1,73 +1,24 @@ #include "con4m.h" -static c4m_dict_t *bound_functions = NULL; +// At least for the time being, we will statically ensure that there +// is a function in the compilation context with the right name and +// signature. For extern stuff, we will not attempt to bind until +// runtime. +// +// Eventually (when we go the REPL) we might want to revise this +// depending on how hot reloading is handled for bindings. +// +// To that end, all callback objects should currently be statically +// bound and unmarshaled from const space. static void callback_init(c4m_callback_t *cb, va_list args) { - c4m_type_t *type = NULL; - void *address = NULL; - c4m_str_t *symbol_name = NULL; - c4m_xlist_t *libraries = NULL; // of c4m_str_t - int32_t static_link = 0; - bool ffi = false; - // bool bind_now = false; + c4m_str_t *symbol_name = va_arg(args, c4m_utf8_t *); + c4m_type_t *type = va_arg(args, c4m_type_t *); - c4m_karg_va_init(args); - c4m_kw_ptr("type", type); - c4m_kw_ptr("address", address); - c4m_kw_ptr("symbol_name", symbol_name); - c4m_kw_int32("static_linking", static_link); - c4m_kw_bool("ffi", ffi); - // c4m_kw_bool("bind_now", bind_now); - - c4m_funcinfo_t *info = NULL; - - if (bound_functions == NULL) { - bound_functions = c4m_new(c4m_tspec_dict(c4m_tspec_ref(), - c4m_tspec_ref())); - c4m_gc_register_root(&bound_functions, 1); - } - - if (address == NULL) { - if (!symbol_name) { - C4M_CRAISE("Not enough information for callback."); - } - - address = dlsym(RTLD_DEFAULT, symbol_name->data); - - if (!address && libraries != NULL) { - for (int i = 0; i < c4m_xlist_len(libraries); i++) { - c4m_utf8_t *s = c4m_to_utf8(c4m_xlist_get(libraries, i, NULL)); - address = dlopen(s->data, RTLD_NOW | RTLD_GLOBAL); - - if (address != NULL) { - break; - } - } - } - } - - if (address != NULL) { - info = hatrack_dict_get(bound_functions, address, NULL); - } - - if (info == NULL) { - info = c4m_gc_alloc(c4m_funcinfo_t); - info->fn = address; - info->name = symbol_name->data; - info->type = type; - - if (ffi) { - info->flags = C4M_CB_FLAG_FFI; - } - - if (static_link) { - info->flags |= C4M_CB_FLAG_STATIC; - } - } - - cb->info = info; + cb->target_symbol_name = c4m_to_utf8(symbol_name); + cb->target_type = type; } const c4m_vtable_t c4m_callback_vtable = { diff --git a/src/con4m/compiler/ast_utils.c b/src/con4m/compiler/ast_utils.c index c89adea5..2c67665b 100644 --- a/src/con4m/compiler/ast_utils.c +++ b/src/con4m/compiler/ast_utils.c @@ -209,11 +209,10 @@ node_to_callback(c4m_file_compile_ctx *ctx, c4m_tree_node_t *n) c4m_utf8_t *name = node_text(c4m_tree_get_child(n, 0)); c4m_type_t *type = c4m_node_to_type(ctx, c4m_tree_get_child(n, 1), NULL); - return c4m_new(c4m_tspec_callback(), - c4m_kw("symbol_name", - c4m_ka(name), - "type", - c4m_ka(type))); + c4m_callback_t *result = c4m_new(c4m_tspec_callback(), name, type); + result->decl_loc = n; + + return result; } c4m_type_t * diff --git a/src/con4m/compiler/check_pass.c b/src/con4m/compiler/check_pass.c index aaca9a5f..8028a5e2 100644 --- a/src/con4m/compiler/check_pass.c +++ b/src/con4m/compiler/check_pass.c @@ -1911,6 +1911,7 @@ check_literal(pass2_ctx *ctx) break; case c4m_nt_lit_callback: pnode->value = node_to_callback(ctx->file_ctx, ctx->node); + c4m_xlist_append(ctx->file_ctx->callback_literals, pnode->value); break; case c4m_nt_lit_tspec: do { @@ -2838,6 +2839,93 @@ process_deferred_calls(c4m_compile_ctx *cctx, } } +static void +process_deferred_callbacks(c4m_compile_ctx *cctx) +{ + // Now that we have a 'whole program view', go ahead and + // try to find a match for any callback literals used. + // + // Meaning, there must either be an in-scope con4m function, or + // an extern declaration that matches the callback, as viewed from + // the module in which the symbol was declared. + + int n = c4m_xlist_len(cctx->module_ordering); + c4m_utf8_t *s; + + for (int i = 0; i < n; i++) { + c4m_file_compile_ctx *f = c4m_xlist_get(cctx->module_ordering, i, NULL); + int m = c4m_xlist_len(f->callback_literals); + for (int j = 0; j < m; j++) { + c4m_callback_t *cb = c4m_xlist_get(f->callback_literals, j, NULL); + + c4m_scope_entry_t *sym = c4m_symbol_lookup(NULL, + f->module_scope, + f->global_scope, + NULL, + cb->target_symbol_name); + + if (!sym) { + c4m_add_error(f, c4m_err_callback_no_match, cb->decl_loc); + return; + } + + switch (sym->kind) { + case sk_func: + cb->binding.ffi = 0; + cb->binding.implementation.ffi_interface = sym->value; + break; + case sk_extern_func: + cb->binding.ffi = 1; + cb->binding.implementation.local_interface = sym->value; + break; + default:; + c4m_tree_node_t *l = sym->declaration_node; + if (l == NULL) { + l = c4m_xlist_get(sym->sym_defs, 0, NULL); + } + s = c4m_node_get_loc_str(l); + c4m_add_error(f, c4m_err_callback_bad_target, cb->decl_loc, s); + return; + } + + c4m_type_t *sym_type = c4m_global_copy(sym->type); + c4m_type_t *lit_type = cb->target_type; + c4m_type_t *merged = c4m_merge_types(sym_type, lit_type); + + if (c4m_tspec_is_error(merged)) { + s = c4m_node_get_loc_str(sym->declaration_node); + c4m_add_error(f, + c4m_err_callback_type_mismatch, + cb->decl_loc, + lit_type, + sym_type, + s); + } + } + } +} + +static void +order_ffi_decls(c4m_compile_ctx *cctx) +{ + // TODO: when incrementally compiling we need to take into + // acount existing FFI decl indexing. + int n = c4m_xlist_len(cctx->module_ordering); + int ix = 0; + + for (int i = 0; i < n; i++) { + c4m_file_compile_ctx *f = c4m_xlist_get(cctx->module_ordering, i, NULL); + int m = c4m_xlist_len(f->extern_decls); + + for (int j = 0; j < m; j++) { + c4m_scope_entry_t *sym = c4m_xlist_get(f->extern_decls, j, NULL); + c4m_ffi_decl_t *decl = (c4m_ffi_decl_t *)sym->value; + + decl->global_ffi_call_ix = ix++; + } + } +} + void c4m_check_pass(c4m_compile_ctx *cctx) { @@ -2874,7 +2962,9 @@ c4m_check_pass(c4m_compile_ctx *cctx) } } + order_ffi_decls(cctx); process_deferred_calls(cctx, all_deferred, num_deferred); + process_deferred_callbacks(cctx); for (int i = 0; i < n; i++) { c4m_file_compile_ctx *f = c4m_xlist_get(cctx->module_ordering, i, NULL); diff --git a/src/con4m/compiler/codegen.c b/src/con4m/compiler/codegen.c index d8ba6eca..222f282d 100644 --- a/src/con4m/compiler/codegen.c +++ b/src/con4m/compiler/codegen.c @@ -535,7 +535,7 @@ static inline void gen_run_callback(gen_ctx *ctx, c4m_callback_t *cb) { uint32_t offset = c4m_layout_const_obj(ctx->cctx, cb); - c4m_type_t *t = cb->info->type; + c4m_type_t *t = cb->target_type; int nargs = c4m_tspec_get_num_params(t) - 1; c4m_type_t *ret_type = c4m_tspec_get_param(t, nargs); bool useret = !(c4m_tspecs_are_compat(ret_type, @@ -597,6 +597,71 @@ gen_unbox_if_needed(gen_ctx *ctx, } } +static void +gen_native_call(gen_ctx *ctx, c4m_scope_entry_t *fsym) +{ + // Needed to calculate the loc of module variables. + // Currently that's done at runtime, tho could be done in + // a proper link pass in the future. + int target_module; + // Index into the object file's cache. + int target_fn_id; + int loc = ctx->instruction_counter; + // When the call is generated, we might not have generated the + // function we're calling. In that case, we will just generate + // a stub and add the actual call instruction to a backpatch + // list that gets processed at the end of compilation. + // + // To test this reliably, we can check the 'offset' field of + // the function info object, as a function never starts at + // offset 0. + c4m_fn_decl_t *decl = fsym->value; + target_module = decl->module_id; + target_fn_id = decl->local_id; + + emit(ctx, + C4M_Z0Call, + c4m_kw("arg", c4m_ka(target_fn_id), "module_id", target_module)); + + if (target_fn_id == 0) { + call_backpatch_info_t *bp; + + bp = c4m_gc_alloc(call_backpatch_info_t); + bp->decl = decl; + bp->i = c4m_xlist_get(ctx->instructions, loc, NULL); + + c4m_xlist_append(ctx->call_backpatches, bp); + } + + int n = decl->signature_info->num_params; + + if (n != 0) { + emit(ctx, C4M_ZMoveSp, c4m_kw("arg", c4m_ka(-n))); + } + + if (!decl->signature_info->void_return) { + emit(ctx, C4M_ZPushFromR0); + gen_unbox_if_needed(ctx, ctx->cur_node, fsym); + } +} + +static void +gen_extern_call(gen_ctx *ctx, c4m_scope_entry_t *fsym) +{ + c4m_ffi_decl_t *decl = (c4m_ffi_decl_t *)fsym->value; + + emit(ctx, C4M_ZFFICall, c4m_kw("arg", c4m_ka(decl->global_ffi_call_ix))); + + if (decl->num_ext_params != 0) { + emit(ctx, C4M_ZMoveSp, c4m_kw("arg", c4m_ka(-decl->num_ext_params))); + } + + if (!decl->local_params->void_return) { + emit(ctx, C4M_ZPushFromR0); + gen_unbox_if_needed(ctx, ctx->cur_node, fsym); + } +} + static void gen_call(gen_ctx *ctx) { @@ -612,51 +677,10 @@ gen_call(gen_ctx *ctx) } if (fsym->kind != sk_func) { - // emit(ctx, C4M_ZMoveSp, c4m_kw("arg", c4m_ka(-1 * nargs))); + gen_extern_call(ctx, fsym); } else { - // Needed to calculate the loc of module variables. - // Currently that's done at runtime, tho could be done in - // a proper link pass in the future. - int target_module; - // Index into the object file's cache. - int target_fn_id; - int loc = ctx->instruction_counter; - // When the call is generated, we might not have generated the - // function we're calling. In that case, we will just generate - // a stub and add the actual call instruction to a backpatch - // list that gets processed at the end of compilation. - // - // To test this reliably, we can check the 'offset' field of - // the function info object, as a function never starts at - // offset 0. - c4m_fn_decl_t *decl = fsym->value; - target_module = decl->module_id; - target_fn_id = decl->local_id; - - emit(ctx, - C4M_Z0Call, - c4m_kw("arg", c4m_ka(target_fn_id), "module_id", target_module)); - - if (target_fn_id == 0) { - call_backpatch_info_t *bp; - - bp = c4m_gc_alloc(call_backpatch_info_t); - bp->decl = decl; - bp->i = c4m_xlist_get(ctx->instructions, loc, NULL); - - c4m_xlist_append(ctx->call_backpatches, bp); - } - - n = decl->signature_info->num_params; - - if (n != 0) { - emit(ctx, C4M_ZMoveSp, c4m_kw("arg", c4m_ka(-n))); - } - if (!decl->signature_info->void_return) { - emit(ctx, C4M_ZPushFromR0); - gen_unbox_if_needed(ctx, ctx->cur_node, fsym); - } + gen_native_call(ctx, fsym); } } @@ -2034,6 +2058,18 @@ gen_module_code(gen_ctx *ctx, c4m_vm_t *vm) gen_function(ctx, sym, module, vm); } + int l = c4m_xlist_len(ctx->fctx->extern_decls); + if (l != 0) { + for (int j = 0; j < l; j++) { + c4m_scope_entry_t *d = c4m_xlist_get(ctx->fctx->extern_decls, + j, + NULL); + c4m_ffi_decl_t *decl = d->value; + + c4m_xlist_append(vm->obj->ffi_info, decl); + } + } + // Version is not used yet. // Init size not done yet. // datasyms not set yet. diff --git a/src/con4m/compiler/decl_pass.c b/src/con4m/compiler/decl_pass.c index 46571328..337ed988 100644 --- a/src/con4m/compiler/decl_pass.c +++ b/src/con4m/compiler/decl_pass.c @@ -502,10 +502,29 @@ handle_param_block(pass1_ctx *ctx) switch (prop_name->data[0]) { case 'v': - prop->validator = lit; + prop->validator = node_to_callback(ctx->file_ctx, lit); + if (!prop->validator) { + c4m_add_error(ctx->file_ctx, + c4m_err_spec_callback_required, + prop_node); + } + else { + c4m_xlist_append(ctx->file_ctx->callback_literals, + prop->validator); + } + break; case 'c': - prop->callback = lit; + prop->callback = node_to_callback(ctx->file_ctx, lit); + if (!prop->callback) { + c4m_add_error(ctx->file_ctx, + c4m_err_spec_callback_required, + prop_node); + } + else { + c4m_xlist_append(ctx->file_ctx->callback_literals, + prop->callback); + } break; case 'd': prop->default_value = lit; @@ -603,6 +622,7 @@ one_section_prop(pass1_ctx *ctx, } else { section->validator = callback; + c4m_xlist_append(ctx->file_ctx->callback_literals, callback); } break; case 'r': // require @@ -740,6 +760,7 @@ one_field(pass1_ctx *ctx, } else { f->validator = callback; + c4m_xlist_append(ctx->file_ctx->callback_literals, callback); } break; default: @@ -1098,16 +1119,17 @@ handle_func_decl(pass1_ctx *ctx) static void handle_extern_block(pass1_ctx *ctx) { - c4m_ffi_decl_t *info = c4m_gc_alloc(c4m_ffi_info_t); + c4m_ffi_decl_t *info = c4m_gc_alloc(c4m_ffi_decl_t); c4m_utf8_t *external_name = node_text(get_match(ctx, c4m_first_kid_id)); - c4m_xlist_t *ext_params = apply_pattern(ctx, c4m_extern_params); c4m_tree_node_t *ext_ret = get_match(ctx, c4m_extern_return); - c4m_pnode_t *pnode = get_pnode(cur_node(ctx)); + c4m_tree_node_t *cur = cur_node(ctx); c4m_tree_node_t *ext_pure = get_match(ctx, c4m_find_pure); c4m_tree_node_t *ext_holds = get_match(ctx, c4m_find_holds); c4m_tree_node_t *ext_allocs = get_match(ctx, c4m_find_allocs); + c4m_tree_node_t *csig = cur_node(ctx)->children[1]; c4m_tree_node_t *ext_lsig = get_match(ctx, c4m_find_extern_local); + c4m_pnode_t *pnode = get_pnode(cur); if (pnode->short_doc) { info->short_doc = c4m_token_raw_content(pnode->short_doc); @@ -1117,14 +1139,27 @@ handle_extern_block(pass1_ctx *ctx) } } - if (ext_params != NULL) { - int64_t n = c4m_xlist_len(ext_params); - info->num_params = n; - info->external_name = external_name; + for (int i = 2; i < cur->num_kids; i++) { + c4m_pnode_t *kid = get_pnode(cur->children[i]); + + if (kid->kind == c4m_nt_extern_dll) { + if (info->dll_list == NULL) { + info->dll_list = c4m_xlist(c4m_tspec_utf8()); + } + c4m_utf8_t *s = node_text(cur->children[i]->children[0]); + c4m_xlist_append(info->dll_list, s); + } + } + + int64_t n = csig->num_kids - 1; + info->num_ext_params = n; + info->external_name = external_name; + + if (n) { info->external_params = c4m_gc_array_alloc(uint8_t, n); for (int64_t i = 0; i < n; i++) { - c4m_tree_node_t *tnode = c4m_xlist_get(ext_params, i, NULL); + c4m_tree_node_t *tnode = csig->children[i]; c4m_pnode_t *pnode = c4m_tree_get_contents(tnode); uint64_t val = (uint64_t)pnode->extra_info; @@ -1171,11 +1206,13 @@ handle_extern_block(pass1_ctx *ctx) continue; } param->ffi_holds = 1; - uint64_t flag = (uint64_t)(1 << j); - if (bitfield & flag) { - c4m_add_warning(ctx->file_ctx, c4m_warn_dupe_hold, kid); + if (j < 64) { + uint64_t flag = (uint64_t)(1 << j); + if (bitfield & flag) { + c4m_add_warning(ctx->file_ctx, c4m_warn_dupe_hold, kid); + } + bitfield |= flag; } - bitfield |= flag; goto next_i; } c4m_add_error(ctx->file_ctx, c4m_err_bad_hold_name, kid); @@ -1183,6 +1220,7 @@ handle_extern_block(pass1_ctx *ctx) next_i: /* nothing. */; } + info->cif.hold_info = bitfield; } if (ext_allocs) { @@ -1210,11 +1248,15 @@ handle_extern_block(pass1_ctx *ctx) continue; } param->ffi_allocs = 1; - uint64_t flag = (uint64_t)(1 << j); - if (bitfield & flag) { - c4m_add_warning(ctx->file_ctx, c4m_warn_dupe_alloc, kid); + if (j < 63) { + uint64_t flag = (uint64_t)(1 << j); + if (bitfield & flag) { + c4m_add_warning(ctx->file_ctx, + c4m_warn_dupe_alloc, + kid); + } + bitfield |= flag; } - bitfield |= flag; goto next_alloc; } c4m_add_error(ctx->file_ctx, c4m_err_bad_alloc_name, kid); @@ -1222,6 +1264,7 @@ handle_extern_block(pass1_ctx *ctx) next_alloc: /* nothing. */; } + info->cif.alloc_info = bitfield; } c4m_scope_entry_t *sym = declare_sym(ctx, @@ -1236,6 +1279,8 @@ handle_extern_block(pass1_ctx *ctx) sym->type = info->local_params->full_type; sym->value = (void *)info; } + + c4m_xlist_append(ctx->file_ctx->extern_decls, sym); } static void @@ -1406,14 +1451,16 @@ c4m_file_decl_pass(c4m_compile_ctx *cctx, c4m_file_compile_ctx *file_ctx) set_current_node(&ctx, file_ctx->parse_tree); - file_ctx->global_scope = c4m_new_scope(NULL, C4M_SCOPE_GLOBAL); - file_ctx->module_scope = c4m_new_scope(file_ctx->global_scope, + file_ctx->global_scope = c4m_new_scope(NULL, C4M_SCOPE_GLOBAL); + file_ctx->module_scope = c4m_new_scope(file_ctx->global_scope, C4M_SCOPE_MODULE); - file_ctx->attribute_scope = c4m_new_scope(NULL, C4M_SCOPE_ATTRIBUTES); - file_ctx->imports = c4m_new_scope(NULL, C4M_SCOPE_IMPORTS); - file_ctx->parameters = c4m_new(c4m_tspec_dict(c4m_tspec_utf8(), + file_ctx->attribute_scope = c4m_new_scope(NULL, C4M_SCOPE_ATTRIBUTES); + file_ctx->imports = c4m_new_scope(NULL, C4M_SCOPE_IMPORTS); + file_ctx->parameters = c4m_new(c4m_tspec_dict(c4m_tspec_utf8(), c4m_tspec_ref())); - file_ctx->fn_def_syms = c4m_new(c4m_tspec_xlist(c4m_tspec_ref())); + file_ctx->fn_def_syms = c4m_new(c4m_tspec_xlist(c4m_tspec_ref())); + file_ctx->callback_literals = c4m_new(c4m_tspec_xlist(c4m_tspec_ref())); + file_ctx->extern_decls = c4m_new(c4m_tspec_xlist(c4m_tspec_ref())); ctx.cur->static_scope = file_ctx->module_scope; ctx.static_scope = file_ctx->module_scope; diff --git a/src/con4m/compiler/disasm.c b/src/con4m/compiler/disasm.c index 6a8f0df0..8d76fef0 100644 --- a/src/con4m/compiler/disasm.c +++ b/src/con4m/compiler/disasm.c @@ -306,6 +306,10 @@ const inst_info_t inst_info[256] = { .arg_fmt = fmt_hex, // Should add a fmt here. .show_module = 1, }, + [C4M_ZFFICall] = { + .name = "ZFFICall", + .arg_fmt = fmt_hex, + }, [C4M_ZLockOnWrite] = { .name = "ZLockOnWrite", }, diff --git a/src/con4m/compiler/errors.c b/src/con4m/compiler/errors.c index 7803ba21..24fba00f 100644 --- a/src/con4m/compiler/errors.c +++ b/src/con4m/compiler/errors.c @@ -1108,6 +1108,28 @@ static error_info_t error_info[] = { "return values.", false, }, + [c4m_err_callback_no_match] = { + c4m_err_callback_no_match, + "callback_no_match", + "Callback does not have a matching declaration. It requires either a " + "con4m function, or an [em]extern[/] declaration.", + false, + }, + [c4m_err_callback_bad_target] = { + c4m_err_callback_bad_target, + "callback_bad_target", + "Callback matches a symbol that is defined, but not as a callable " + "function. First definition is here: {}", + true, + }, + [c4m_err_callback_type_mismatch] = { + c4m_err_callback_type_mismatch, + "callback_type_mismatch", + "Declared callback type is not compatable with the implementation " + "callback type ([em]{}[/] vs declared type [em]{}[/]). " + " Declaration is here: {}", + true, + }, [c4m_err_last] = { c4m_err_last, "last", diff --git a/src/con4m/compiler/lex.c b/src/con4m/compiler/lex.c index af37ceb9..3d7eb313 100644 --- a/src/con4m/compiler/lex.c +++ b/src/con4m/compiler/lex.c @@ -427,14 +427,13 @@ scan_int_or_float_literal(lex_state_t *state) goto finished_int; } } -finished_int: { +finished_int:; uint64_t n = (uint64_t)val; state->pos = state->start + i; LITERAL_TOK(c4m_tt_int_lit); state->last_token->literal_value = (void *)n; return; } -} static inline void scan_hex_literal(lex_state_t *state) @@ -483,6 +482,7 @@ scan_int_float_or_hex_literal(lex_state_t *state) switch (peek(state)) { case 'x': case 'X': + advance(state); scan_hex_literal(state); return; default: diff --git a/src/con4m/compiler/parse.c b/src/con4m/compiler/parse.c index 8ca48799..710c35c3 100644 --- a/src/con4m/compiler/parse.c +++ b/src/con4m/compiler/parse.c @@ -142,65 +142,6 @@ static c4m_tree_node_t *bit_or_expr_rhs(parse_ctx *); static c4m_tree_node_t *ne_expr_rhs(parse_ctx *); static c4m_tree_node_t *and_expr_rhs(parse_ctx *); -typedef struct { - char *ctype_name; - int index; - bool can_take_param; -} ctype_name_info_t; - -static const ctype_name_info_t ctype_info[] = { - // clang-format off - { "void", 0, false, }, - { "cvoid", 0, false, }, - { "u8", 1, false, }, - { "cu8", 1, false, }, - { "i8", 2, false, }, - { "ci8", 2, false, }, - { "u16", 3, false, }, - { "cu16", 3, false, }, - { "i16", 4, false, }, - { "ci16", 4, false, }, - { "u32", 5, false, }, - { "cu32", 5, false, }, - { "i32", 6, false, }, - { "ci32", 6, false, }, - { "u64", 7, false, }, - { "cu64", 7, false, }, - { "i64", 8, false, }, - { "ci64", 8, false, }, - { "cfloat", 9, false, }, - { "cdouble", 10, false, }, - { "double", 10, false, }, - { "cuchar", 11, false, }, - { "cchar", 12, false, }, - { "short", 13, false, }, - { "cshort", 13, false, }, - { "ushort", 14, false, }, - { "cushort", 14, false, }, - { "cint", 15, false, }, - { "cuint", 16, false, }, - { "long", 17, false, }, - { "ulong", 18, false, }, - { "bool", 19, false, }, - { "size_t", 20, false, }, - { "size", 20, false, }, - { "size_t", 20, false, }, - { "csize", 20, false, }, - { "csize_t", 20, false, }, - { "ssize", 21, false, }, - { "ssize_t", 21, false, }, - { "cssize", 21, false, }, - { "ssize_t", 21, false, }, - { "cssize_t", 21, false, }, - { "ptr", 22, true, }, - { "pointer", 22, true, }, - { "cstring", 23, true, }, - { "carray", 24, true, }, - { "array", 24, true, }, - { NULL, 0, false, }, - // clang-format on -}; - typedef struct { char *name; unsigned int show_contents : 1; @@ -294,23 +235,6 @@ static const node_type_info_t node_type_info[] = { // clang-format on }; -static int -lookup_ctype_id(char *found) -{ - ctype_name_info_t *info = (ctype_name_info_t *)&ctype_info[0]; - - while (true) { - if (info->ctype_name == NULL) { - return -1; - } - if (!strcmp(info->ctype_name, found)) { - return info->index; - } - - info++; - } -} - #ifdef PARSE_DEBUG static inline c4m_token_t * _tok_cur(parse_ctx *ctx, int line) @@ -1182,7 +1106,7 @@ extern_local(parse_ctx *ctx) static void extern_dll(parse_ctx *ctx) { - start_node(ctx, c4m_nt_extern_local, true); + start_node(ctx, c4m_nt_extern_dll, true); if (!expect(ctx, c4m_tt_colon)) { end_node(ctx); return; @@ -1263,6 +1187,24 @@ extern_allocs(parse_ctx *ctx) return; } +static void +extern_sig_item(parse_ctx *ctx, c4m_node_kind_t kind) +{ + char *txt = identifier_text(tok_cur(ctx))->data; + int64_t ctype_id = (int64_t)c4m_lookup_ctype_id(txt); + if (ctype_id == -1) { + add_parse_error(ctx, c4m_err_parse_bad_ctype_id); + consume(ctx); + } + else { + start_node(ctx, kind, true); + c4m_pnode_t *n = current_parse_node(ctx); + + n->extra_info = (void *)(uint64_t)ctype_id; + end_node(ctx); + } +} + static void extern_signature(parse_ctx *ctx) { @@ -1274,7 +1216,9 @@ extern_signature(parse_ctx *ctx) if (tok_kind(ctx) == c4m_tt_rparen) { consume(ctx); end_node(ctx); - opt_return_type(ctx); + expect(ctx, c4m_tt_arrow); + extern_sig_item(ctx, c4m_nt_lit_tspec_return_type); + return; } while (true) { @@ -1283,20 +1227,7 @@ extern_signature(parse_ctx *ctx) consume(ctx); } else { - char *txt = identifier_text(tok_cur(ctx))->data; - int64_t ctype_id = (int64_t)lookup_ctype_id(txt); - - if (ctype_id == -1) { - add_parse_error(ctx, c4m_err_parse_bad_ctype_id); - consume(ctx); - } - else { - start_node(ctx, c4m_nt_extern_param, true); - c4m_pnode_t *n = current_parse_node(ctx); - - n->extra_info = (void *)(uint64_t)ctype_id; - end_node(ctx); - } + extern_sig_item(ctx, c4m_nt_extern_param); } if (tok_kind(ctx) != c4m_tt_comma) { @@ -1306,7 +1237,8 @@ extern_signature(parse_ctx *ctx) } expect(ctx, c4m_tt_rparen); - opt_return_type(ctx); + expect(ctx, c4m_tt_arrow); + extern_sig_item(ctx, c4m_nt_lit_tspec_return_type); end_node(ctx); } diff --git a/src/con4m/compiler/scope.c b/src/con4m/compiler/scope.c index 59420088..54a3f3c5 100644 --- a/src/con4m/compiler/scope.c +++ b/src/con4m/compiler/scope.c @@ -404,8 +404,12 @@ c4m_format_scope(c4m_scope_t *scope) c4m_utf8_t *kind; c4m_scope_entry_t *entry = values[i]; - kind = c4m_type_is_declared(entry) ? decl_const : inf_const; - row = c4m_new_table_row(); + kind = inf_const; + + if (c4m_type_is_declared(entry) || entry->kind == sk_extern_func) { + kind = decl_const; + } + row = c4m_new_table_row(); c4m_xlist_append(row, entry->name); diff --git a/src/con4m/ffi.c b/src/con4m/ffi.c new file mode 100644 index 00000000..4e42aefc --- /dev/null +++ b/src/con4m/ffi.c @@ -0,0 +1,182 @@ +#include "con4m.h" + +typedef struct { + char *ctype_name; + uint8_t index; + bool can_take_param; +} ctype_name_info_t; + +static const ctype_name_info_t ctype_name_info[] = { + // clang-format off + { "void", 0, false, }, + { "cvoid", 0, false, }, + { "u8", 1, false, }, + { "cu8", 1, false, }, + { "i8", 2, false, }, + { "ci8", 2, false, }, + { "u16", 3, false, }, + { "cu16", 3, false, }, + { "i16", 4, false, }, + { "ci16", 4, false, }, + { "u32", 5, false, }, + { "cu32", 5, false, }, + { "i32", 6, false, }, + { "ci32", 6, false, }, + { "u64", 7, false, }, + { "cu64", 7, false, }, + { "i64", 8, false, }, + { "ci64", 8, false, }, + { "cfloat", 9, false, }, + { "cdouble", 10, false, }, + { "double", 10, false, }, + { "cuchar", 11, false, }, + { "cchar", 12, false, }, + { "short", 13, false, }, + { "cshort", 13, false, }, + { "ushort", 14, false, }, + { "cushort", 14, false, }, + { "cint", 15, false, }, + { "cuint", 16, false, }, + { "ulong", 17, false, }, + { "long", 18, false, }, + { "bool", 19, false, }, + { "size_t", 20, false, }, + { "size", 20, false, }, + { "size_t", 20, false, }, + { "csize", 20, false, }, + { "csize_t", 20, false, }, + { "ssize", 21, false, }, + { "ssize_t", 21, false, }, + { "cssize", 21, false, }, + { "ssize_t", 21, false, }, + { "cssize_t", 21, false, }, + { "ptr", 22, true, }, + { "pointer", 22, true, }, + { "carray", 23, true, }, + { "array", 23, true, }, + { "cstring", C4M_CSTR_CTYPE_CONST, true, }, + { NULL, 0, false, }, + // clang-format on +}; + +static const c4m_ffi_type *ffi_type_map[] = { + &ffi_type_void, + &ffi_type_uint8, + &ffi_type_sint8, + &ffi_type_uint16, + &ffi_type_sint16, + &ffi_type_uint32, + &ffi_type_sint32, + &ffi_type_uint64, + &ffi_type_sint64, + &ffi_type_float, + &ffi_type_double, + &ffi_type_uchar, + &ffi_type_schar, + &ffi_type_ushort, + &ffi_type_sshort, + &ffi_type_uint, + &ffi_type_sint, + &ffi_type_ulong, + &ffi_type_slong, + &ffi_type_sint8, // Bool is 1 byte per the C standard. + &ffi_type_uint, // I believe size_t is always a unsigned integer. + &ffi_type_sint, + &ffi_type_pointer, + &ffi_type_pointer, + &ffi_type_pointer, + NULL, +}; + +void * +c4m_ref_via_ffi_type(c4m_box_t *box, c4m_ffi_type *t) +{ + if (t == &ffi_type_uint8 || t == &ffi_type_sint8) { + return &box->u8; + } + if (t == &ffi_type_uint16 || t == &ffi_type_sint16) { + return &box->u16; + } + if (t == &ffi_type_uint32 || t == &ffi_type_sint32) { + return &box->u32; + } + + return box; +} + +static c4m_dict_t *c4m_symbol_cache = NULL; + +static inline void +ffi_init() +{ + if (c4m_symbol_cache == NULL) { + c4m_symbol_cache = c4m_new(c4m_tspec_dict(c4m_tspec_utf8(), + c4m_tspec_ref())); + c4m_gc_register_root(&c4m_symbol_cache, 1); + } +} + +int64_t +c4m_lookup_ctype_id(char *found) +{ + ctype_name_info_t *info = (ctype_name_info_t *)&ctype_name_info[0]; + + while (true) { + if (info->ctype_name == NULL) { + return -1; + } + if (!strcmp(info->ctype_name, found)) { + return (int64_t)info->index; + } + + info++; + } +} + +c4m_ffi_type * +c4m_ffi_arg_type_map(uint8_t ix) +{ + return (c4m_ffi_type *)ffi_type_map[ix]; +} + +void +c4m_add_static_function(c4m_utf8_t *name, void *symbol) +{ + ffi_init(); + + hatrack_dict_put(c4m_symbol_cache, name, symbol); +} + +void * +c4m_ffi_find_symbol(c4m_utf8_t *name, c4m_xlist_t *opt_libs) +{ + ffi_init(); + + void *ptr = hatrack_dict_get(c4m_symbol_cache, name, NULL); + + if (ptr != NULL) { + return ptr; + } + + ptr = dlsym(RTLD_DEFAULT, name->data); + + if (ptr != NULL) { + return ptr; + } + + if (opt_libs == NULL) { + int n = c4m_xlist_len(opt_libs); + + for (int i = 0; i < n; i++) { + c4m_utf8_t *s = c4m_xlist_get(opt_libs, i, NULL); + if (dlopen(s->data, RTLD_NOW | RTLD_GLOBAL) != NULL) { + ptr = dlsym(RTLD_DEFAULT, name->data); + if (ptr != NULL) { + return ptr; + } + } + } + } + + return NULL; +} diff --git a/src/con4m/gc.c b/src/con4m/gc.c index 24142719..636df65e 100644 --- a/src/con4m/gc.c +++ b/src/con4m/gc.c @@ -14,6 +14,7 @@ static c4m_dict_t *global_roots; uint64_t c4m_gc_guard = 0; static thread_local c4m_arena_t *current_heap = NULL; +static c4m_set_t *external_holds = NULL; static c4m_system_finalizer_fn system_finalizer = NULL; static uint64_t page_bytes; static uint64_t page_modulus; @@ -86,29 +87,35 @@ c4m_initialize_gc() static bool once = false; if (!once) { - c4m_gc_guard = c4m_rand64(); - global_roots = c4m_rc_alloc(sizeof(c4m_dict_t)); - once = true; - page_bytes = getpagesize(); - page_modulus = page_bytes - 1; // Page size is always a power of 2. - modulus_mask = ~page_modulus; + c4m_gc_guard = c4m_rand64(); + global_roots = c4m_rc_alloc(sizeof(c4m_dict_t)); + external_holds = c4m_rc_alloc(sizeof(c4m_set_t)); + once = true; + page_bytes = getpagesize(); + page_modulus = page_bytes - 1; // Page size is always a power of 2. + modulus_mask = ~page_modulus; c4m_gc_trace("init:set_guard:%llx", c4m_gc_guard); c4m_gc_trace("init:global_root_addr:@%p", global_roots); - // use c4m_gc_malloc_wrapper for hatrack's zalloc function since our - // gc allocator always returns zeroed memory. - // hatrack_setmallocfns(NULL, - // NULL, - // NULL, - // NULL, - // c4m_gc_malloc_wrapper, - // NULL); - hatrack_dict_init(global_roots, HATRACK_DICT_KEY_TYPE_PTR); + hatrack_set_init(external_holds, HATRACK_DICT_KEY_TYPE_PTR); + hatrack_dict_put(global_roots, &external_holds, (void *)1); } } +void +c4m_gc_add_hold(c4m_obj_t obj) +{ + hatrack_set_add(external_holds, obj); +} + +void +c4m_gc_remove_hold(c4m_obj_t obj) +{ + hatrack_set_remove(external_holds, obj); +} + // The idea here is once the object unmarshals the object file and // const objects, it can make the heap up till that point read-only. // We definitely won't want to allocate anything that will need diff --git a/src/con4m/init.c b/src/con4m/init.c index 3d0273e4..43a4c66e 100644 --- a/src/con4m/init.c +++ b/src/con4m/init.c @@ -6,6 +6,20 @@ char **c4m_stashed_argv; char **c4m_stashed_envp; +uint64_t +c4m_clz(uint64_t n) +{ + return __builtin_clzll(n); +} + +static void +c4m_register_builtins() +{ + c4m_add_static_function(c4m_new_utf8("c4m_clz"), c4m_clz); + c4m_add_static_function(c4m_new_utf8("c4m_gc_remove_hold"), + c4m_gc_remove_hold); +} + __attribute__((constructor)) void c4m_init(int argc, char **argv, char **envp) { @@ -16,6 +30,8 @@ c4m_init(int argc, char **argv, char **envp) c4m_initialize_gc(); c4m_gc_set_finalize_callback((void *)c4m_finalize_allocation); c4m_initialize_global_types(); + c4m_init_std_streams(); + c4m_register_builtins(); } c4m_xlist_t * diff --git a/src/con4m/numbers.c b/src/con4m/numbers.c index 2f04bc29..3a4c626c 100644 --- a/src/con4m/numbers.c +++ b/src/con4m/numbers.c @@ -115,7 +115,7 @@ raw_hex_parse(c4m_utf8_t *u8, c4m_compile_error_t *err) // Here we expect *s to point to the first // character after any leading '0x'. __uint128_t cur = 0; - char *s = u8->data; + char *s = u8->data + 2; char c; bool even = true; @@ -199,7 +199,7 @@ raw_hex_parse(c4m_utf8_t *u8, c4m_compile_error_t *err) return c4m_box_##magic_type(-1 * val); \ } \ else { \ - if (val > overflow_val) { \ + if (st == ST_Base10 && val > overflow_val) { \ *code = c4m_err_parse_lit_overflow; \ return NULL; \ } \ diff --git a/src/con4m/path.c b/src/con4m/path.c index 9db27f86..67dcc21d 100644 --- a/src/con4m/path.c +++ b/src/con4m/path.c @@ -36,8 +36,13 @@ c4m_get_user_dir(c4m_utf8_t *user) if (user == NULL) { result = c4m_get_env(c4m_new_utf8("HOME")); if (!result) { - pw = getpwent(); - result = c4m_new_utf8(pw->pw_dir); + pw = getpwent(); + if (pw == NULL) { + result = c4m_new_utf8("/"); + } + else { + result = c4m_new_utf8(pw->pw_dir); + } } } else { diff --git a/src/con4m/streams.c b/src/con4m/streams.c index 9fe7ab8f..a574900c 100644 --- a/src/con4m/streams.c +++ b/src/con4m/streams.c @@ -724,8 +724,8 @@ static c4m_stream_t *c4m_stream_stdin = NULL; static c4m_stream_t *c4m_stream_stdout = NULL; static c4m_stream_t *c4m_stream_stderr = NULL; -static inline void -init_std_streams() +void +c4m_init_std_streams() { if (c4m_stream_stdin == NULL) { c4m_stream_stdin = c4m_new(c4m_tspec_stream(), @@ -743,21 +743,18 @@ init_std_streams() c4m_stream_t * c4m_get_stdin() { - init_std_streams(); return c4m_stream_stdin; } c4m_stream_t * c4m_get_stdout() { - init_std_streams(); return c4m_stream_stdout; } c4m_stream_t * c4m_get_stderr() { - init_std_streams(); return c4m_stream_stderr; } diff --git a/src/con4m/vm.c b/src/con4m/vm.c index 4b17aa20..774ca4e0 100644 --- a/src/con4m/vm.c +++ b/src/con4m/vm.c @@ -638,7 +638,58 @@ c4m_vm_call_module(c4m_vmthread_t *tstate, c4m_zinstruction_t *i) static void c4m_vm_ffi_call(c4m_vmthread_t *tstate, c4m_zinstruction_t *i, int64_t ix) { - // TODO ffi_call + c4m_ffi_decl_t *decl = c4m_xlist_get(tstate->vm->obj->ffi_info, + i->arg, + NULL); + + if (decl == NULL) { + fprintf(stderr, "Could not load external function.\n"); + abort(); + } + + c4m_zffi_cif *ffiinfo = &decl->cif; + void **args; + + if (!ffiinfo->cif.nargs) { + args = NULL; + } + else { + args = c4m_gc_array_alloc(void *, ffiinfo->cif.nargs); + int n = ffiinfo->cif.nargs; + + for (unsigned int i = 0; i < ffiinfo->cif.nargs; i++) { + // clang-format off + --n; + + if (ffiinfo->str_convert && + n < 63 && + ((1 << n) & ffiinfo->str_convert)) { + + c4m_utf8_t *s = (c4m_utf8_t *)tstate->sp[i].rvalue.obj; + s = c4m_to_utf8(s); + args[n] = &s->data; + } + // clang-format on + else { + c4m_box_t value = {.u64 = tstate->sp[i].uint}; + c4m_box_t *box = c4m_new(c4m_tspec_box(c4m_tspec_ref()), + value); + args[n] = c4m_ref_via_ffi_type(box, + ffiinfo->cif.arg_types[n]); + } + + if (n < 63 && ((1 << n) & ffiinfo->hold_info)) { + c4m_gc_add_hold(tstate->sp[i].rvalue.obj); + } + } + } + + ffi_call(&ffiinfo->cif, ffiinfo->fptr, &tstate->r0, args); + + if (ffiinfo->str_convert & (1UL << 63)) { + char *s = (char *)tstate->r0.obj; + tstate->r0.obj = c4m_new_utf8(s); + } } static void @@ -1469,10 +1520,56 @@ c4m_vm_load_const_data(c4m_vm_t *vm) c4m_internal_lock_then_unstash_heap(); } +static inline void +c4m_vm_setup_ffi(c4m_vm_t *vm) +{ + vm->ffi_info_entries = c4m_xlist_len(vm->obj->ffi_info); + + if (vm->ffi_info_entries == 0) { + return; + } + + for (int i = 0; i < vm->ffi_info_entries; i++) { + c4m_ffi_decl_t *ffi_info = c4m_xlist_get(vm->obj->ffi_info, i, NULL); + c4m_zffi_cif *cif = &ffi_info->cif; + + cif->fptr = c4m_ffi_find_symbol(ffi_info->external_name, + ffi_info->dll_list); + + if (!cif->fptr) { + // TODO: warn. For now, just error if it gets called. + continue; + } + + int n = ffi_info->num_ext_params; + c4m_ffi_type **arglist = c4m_gc_array_alloc(c4m_ffi_type *, n); + + for (int j = 0; j < n; j++) { + uint8_t param = ffi_info->external_params[j]; + arglist[j] = c4m_ffi_arg_type_map(param); + + if (param == C4M_CSTR_CTYPE_CONST && j < 63) { + cif->str_convert |= (1UL << j); + } + } + + if (ffi_info->external_return_type == C4M_CSTR_CTYPE_CONST) { + cif->str_convert |= (1UL << 63); + } + + ffi_prep_cif(&cif->cif, + C4M_FFI_DEFAULT_ABI, + n, + c4m_ffi_arg_type_map(ffi_info->external_return_type), + arglist); + } +} + void c4m_vm_setup_runtime(c4m_vm_t *vm) { c4m_vm_load_const_data(vm); + c4m_vm_setup_ffi(vm); } void diff --git a/src/con4m/vmmarshal.c b/src/con4m/vmmarshal.c index a6e9f8f9..bc292ea1 100644 --- a/src/con4m/vmmarshal.c +++ b/src/con4m/vmmarshal.c @@ -84,6 +84,7 @@ unmarshal_instruction(c4m_stream_t *in, c4m_dict_t *memos) return out; } +#if 0 // Removing for now static void marshal_ffi_arg_info(void *ref, c4m_stream_t *out, c4m_dict_t *memos, int64_t *mid) { @@ -99,6 +100,7 @@ marshal_ffi_arg_info(void *ref, c4m_stream_t *out, c4m_dict_t *memos, int64_t *m static void * unmarshal_ffi_arg_info(c4m_stream_t *in, c4m_dict_t *memos) { + c4m_zffi_arg_info_t *out = c4m_gc_alloc(c4m_zffi_arg_info_t); out->held = c4m_unmarshal_bool(in); @@ -109,10 +111,12 @@ unmarshal_ffi_arg_info(c4m_stream_t *in, c4m_dict_t *memos) return out; } +#endif static void marshal_ffi_info(void *ref, c4m_stream_t *out, c4m_dict_t *memos, int64_t *mid) { +#if 0 c4m_zffi_info_t *in = ref; c4m_marshal_i64(in->nameoffset, out); @@ -124,11 +128,13 @@ marshal_ffi_info(void *ref, c4m_stream_t *out, c4m_dict_t *memos, int64_t *mid) marshal_xlist_ref(in->arg_info, out, memos, mid, marshal_ffi_arg_info); c4m_sub_marshal(in->shortdoc, out, memos, mid); c4m_sub_marshal(in->longdoc, out, memos, mid); +#endif } static void * unmarshal_ffi_info(c4m_stream_t *in, c4m_dict_t *memos) { +#if 0 c4m_zffi_info_t *out = c4m_gc_alloc(c4m_zffi_info_t); out->nameoffset = c4m_unmarshal_i64(in); @@ -142,6 +148,8 @@ unmarshal_ffi_info(c4m_stream_t *in, c4m_dict_t *memos) out->longdoc = c4m_sub_unmarshal(in, memos); return out; +#endif + return NULL; } static void diff --git a/src/hatrack/hash/set.c b/src/hatrack/hash/set.c index 044ae6d5..d17f00b5 100644 --- a/src/hatrack/hash/set.c +++ b/src/hatrack/hash/set.c @@ -980,5 +980,5 @@ hatrack_set_epoch_sort_cmp(const void *b1, const void *b2) item1 = (hatrack_set_view_t *)b1; item2 = (hatrack_set_view_t *)b2; - return item2->sort_epoch - item1->sort_epoch; + return item1->sort_epoch - item2->sort_epoch; } diff --git a/src/tests/test.c b/src/tests/test.c index 25455c55..03e840c6 100644 --- a/src/tests/test.c +++ b/src/tests/test.c @@ -635,6 +635,8 @@ test_compiler() return; } + c4m_add_static_function(c4m_new_utf8("strndup"), strndup); + for (int64_t i = 0; i < l; i++) { c4m_utf8_t *fname = c4m_xlist_get(files, i, NULL); c4m_utf8_t *path; diff --git a/tests/basic15.c4m b/tests/basic15.c4m new file mode 100644 index 00000000..11d2b9dd --- /dev/null +++ b/tests/basic15.c4m @@ -0,0 +1,6 @@ +extern c4m_clz(i64) -> i64 { + local: clz(x: int) -> int +} + +print clz(0x0fffffffffffffff) + diff --git a/tests/basic16.c4m b/tests/basic16.c4m new file mode 100644 index 00000000..c15d6caa --- /dev/null +++ b/tests/basic16.c4m @@ -0,0 +1,7 @@ +extern strndup(cstring, csize_t) -> cstring { + local: test(s: string, n: int) -> string +} + +x = "Hello, world!" +print x +print(test(x, 4)) \ No newline at end of file