diff options
Diffstat (limited to 'examples/coroutines')
| -rw-r--r-- | examples/coroutines/ACO_LICENSE | 202 | ||||
| -rw-r--r-- | examples/coroutines/README.md | 24 | ||||
| -rw-r--r-- | examples/coroutines/aco.c | 492 | ||||
| -rw-r--r-- | examples/coroutines/aco.h | 214 | ||||
| -rw-r--r-- | examples/coroutines/acoyield.S | 208 | ||||
| -rw-r--r-- | examples/coroutines/coroutines.tm | 67 |
6 files changed, 1207 insertions, 0 deletions
diff --git a/examples/coroutines/ACO_LICENSE b/examples/coroutines/ACO_LICENSE new file mode 100644 index 00000000..ef4f82f0 --- /dev/null +++ b/examples/coroutines/ACO_LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2018] [Sen Han <00hnes@gmail.com>] + Copyright [2024] [Bruce Hill <bruce@bruce-hill.com>] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/examples/coroutines/README.md b/examples/coroutines/README.md new file mode 100644 index 00000000..eef923e0 --- /dev/null +++ b/examples/coroutines/README.md @@ -0,0 +1,24 @@ +# Tomo Coroutine Library + +This is a coroutine library built on top of a modified version of +[libaco](https://libaco.org). + +## Example Usage + +```tomo +use coroutines + +func main() + co := Coroutine(func() + say("I'm in the coroutine!") + yield() + say("I'm back in the coroutine!") + ) + >> co + say("I'm in the main func") + >> co.resume() + say("I'm back in the main func") + >> co.resume() + say("I'm back in the main func again") + >> co.resume() +``` diff --git a/examples/coroutines/aco.c b/examples/coroutines/aco.c new file mode 100644 index 00000000..3226468b --- /dev/null +++ b/examples/coroutines/aco.c @@ -0,0 +1,492 @@ +// Copyright 2018 Sen Han <00hnes@gmail.com> +// Modifications copyright 2025 Bruce Hill <bruce@bruce-hill.com> +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define _GNU_SOURCE + +#include "aco.h" +#include <stdio.h> +#include <stdint.h> + +#ifndef public +#define public __attribute__ ((visibility ("default"))) +#endif + +#define aco_size_t_safe_add_assert(a,b) aco_assert((a)+(b) >= (a)) + +static void aco_default_protector_last_word(void*); + +void* (*aco_alloc_fn)(size_t) = malloc; +void (*aco_dealloc_fn)(void*) = free; + +#define aco_alloc(size) ({ \ + void *_ptr = aco_alloc_fn(size); \ + if (aco_unlikely((_ptr) == NULL)) { \ + fprintf(stderr, "Aborting: failed to allocate memory: %s:%d:%s\n", \ + __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + abort(); \ + } \ + _ptr; \ +}) + +// aco's Global Thread Local Storage variable `co` +public __thread aco_t* aco_gtls_co; +static __thread aco_cofuncp_t aco_gtls_last_word_fp = aco_default_protector_last_word; + +#ifdef __i386__ + static __thread void* aco_gtls_fpucw_mxcsr[2]; +#elif __x86_64__ + static __thread void* aco_gtls_fpucw_mxcsr[1]; +#else + #error "platform not supporteded yet" +#endif + +public void aco_runtime_test(void) { +#ifdef __i386__ + _Static_assert(sizeof(void*) == 4, "require 'sizeof(void*) == 4'"); +#elif __x86_64__ + _Static_assert(sizeof(void*) == 8, "require 'sizeof(void*) == 8'"); + _Static_assert(sizeof(__uint128_t) == 16, "require 'sizeof(__uint128_t) == 16'"); +#else + #error "platform not supporteded yet" +#endif + _Static_assert(sizeof(int) >= 4, "require 'sizeof(int) >= 4'"); + aco_assert(sizeof(int) >= 4); + _Static_assert(sizeof(int) <= sizeof(size_t), + "require 'sizeof(int) <= sizeof(size_t)'"); + aco_assert(sizeof(int) <= sizeof(size_t)); +} + +#ifdef __x86_64__ +static inline void aco_fast_memcpy(void *dst, const void *src, size_t sz) { + if (((uintptr_t)src & 0x0f) != 0 + || ((uintptr_t)dst & 0x0f) != 0 + || (sz & 0x0f) != 0x08 + || (sz >> 4) > 8) { + memcpy(dst, src, sz); + return; + } + + __uint128_t xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7; + switch (sz >> 4) { + case 0: + break; + case 1: + xmm0 = *((__uint128_t*)src + 0); + *((__uint128_t*)dst + 0) = xmm0; + break; + case 2: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + break; + case 3: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + break; + case 4: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + xmm3 = *((__uint128_t*)src + 3); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + *((__uint128_t*)dst + 3) = xmm3; + break; + case 5: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + xmm3 = *((__uint128_t*)src + 3); + xmm4 = *((__uint128_t*)src + 4); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + *((__uint128_t*)dst + 3) = xmm3; + *((__uint128_t*)dst + 4) = xmm4; + break; + case 6: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + xmm3 = *((__uint128_t*)src + 3); + xmm4 = *((__uint128_t*)src + 4); + xmm5 = *((__uint128_t*)src + 5); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + *((__uint128_t*)dst + 3) = xmm3; + *((__uint128_t*)dst + 4) = xmm4; + *((__uint128_t*)dst + 5) = xmm5; + break; + case 7: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + xmm3 = *((__uint128_t*)src + 3); + xmm4 = *((__uint128_t*)src + 4); + xmm5 = *((__uint128_t*)src + 5); + xmm6 = *((__uint128_t*)src + 6); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + *((__uint128_t*)dst + 3) = xmm3; + *((__uint128_t*)dst + 4) = xmm4; + *((__uint128_t*)dst + 5) = xmm5; + *((__uint128_t*)dst + 6) = xmm6; + break; + case 8: + xmm0 = *((__uint128_t*)src + 0); + xmm1 = *((__uint128_t*)src + 1); + xmm2 = *((__uint128_t*)src + 2); + xmm3 = *((__uint128_t*)src + 3); + xmm4 = *((__uint128_t*)src + 4); + xmm5 = *((__uint128_t*)src + 5); + xmm6 = *((__uint128_t*)src + 6); + xmm7 = *((__uint128_t*)src + 7); + *((__uint128_t*)dst + 0) = xmm0; + *((__uint128_t*)dst + 1) = xmm1; + *((__uint128_t*)dst + 2) = xmm2; + *((__uint128_t*)dst + 3) = xmm3; + *((__uint128_t*)dst + 4) = xmm4; + *((__uint128_t*)dst + 5) = xmm5; + *((__uint128_t*)dst + 6) = xmm6; + *((__uint128_t*)dst + 7) = xmm7; + break; + } + *((uint64_t*)((uintptr_t)dst + sz - 8)) = *((uint64_t*)((uintptr_t)src + sz - 8)); +} +#endif + +void aco_default_protector_last_word(void*) { + aco_t* co = aco_get_co(); + // do some log about the offending `co` + fprintf(stderr,"error: aco_default_protector_last_word triggered\n"); + fprintf(stderr, "error: co:%p should call `aco_exit()` instead of direct " + "`return` in co_fp:%p to finish its execution\n", co, (void*)co->fp); + aco_assert(0); +} + +public void aco_set_allocator(void* (*alloc)(size_t), void (*dealloc)(void*)) +{ + aco_alloc_fn = alloc; + aco_dealloc_fn = dealloc; +} + +public void aco_thread_init(aco_cofuncp_t last_word_co_fp) { + aco_save_fpucw_mxcsr(aco_gtls_fpucw_mxcsr); + + if ((void*)last_word_co_fp != NULL) + aco_gtls_last_word_fp = last_word_co_fp; +} + +// This function `aco_funcp_protector` should never be +// called. If it's been called, that means the offending +// `co` didn't call aco_exit(co) instead of `return` to +// finish its execution. +public void aco_funcp_protector(void) { + if ((void*)(aco_gtls_last_word_fp) != NULL) { + aco_gtls_last_word_fp(NULL); + } else { + aco_default_protector_last_word(NULL); + } + aco_assert(0); +} + +public aco_shared_stack_t* aco_shared_stack_new(size_t sz) { + return aco_shared_stack_new2(sz, 1); +} + +public aco_shared_stack_t* aco_shared_stack_new2(size_t sz, bool guard_page_enabled) { + if (sz == 0) { + sz = 1024 * 1024 * 2; + } + if (sz < 4096) { + sz = 4096; + } + aco_assert(sz > 0); + + size_t u_pgsz = 0; + if (guard_page_enabled) { + // although gcc's Built-in Functions to Perform Arithmetic with + // Overflow Checking is better, but it would require gcc >= 5.0 + long pgsz = sysconf(_SC_PAGESIZE); + // pgsz must be > 0 && a power of two + aco_assert(pgsz > 0 && (((pgsz - 1) & pgsz) == 0)); + u_pgsz = (size_t)((unsigned long)pgsz); + // it should be always true in real life + aco_assert(u_pgsz == (unsigned long)pgsz && ((u_pgsz << 1) >> 1) == u_pgsz); + if (sz <= u_pgsz) { + sz = u_pgsz << 1; + } else { + size_t new_sz; + if ((sz & (u_pgsz - 1)) != 0) { + new_sz = (sz & (~(u_pgsz - 1))); + aco_assert(new_sz >= u_pgsz); + aco_size_t_safe_add_assert(new_sz, (u_pgsz << 1)); + new_sz = new_sz + (u_pgsz << 1); + aco_assert(sz / u_pgsz + 2 == new_sz / u_pgsz); + } else { + aco_size_t_safe_add_assert(sz, u_pgsz); + new_sz = sz + u_pgsz; + aco_assert(sz / u_pgsz + 1 == new_sz / u_pgsz); + } + sz = new_sz; + aco_assert((sz / u_pgsz > 1) && ((sz & (u_pgsz - 1)) == 0)); + } + } + + aco_shared_stack_t* p = aco_alloc(sizeof(aco_shared_stack_t)); + memset(p, 0, sizeof(aco_shared_stack_t)); + + if (guard_page_enabled) { + p->real_ptr = mmap( + NULL, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0 + ); + if (aco_unlikely(p->real_ptr == MAP_FAILED)) { + fprintf(stderr, "Aborting: failed to allocate memory: %s:%d:%s\n", + __FILE__, __LINE__, __PRETTY_FUNCTION__); + abort(); + } + p->guard_page_enabled = true; + aco_assert(0 == mprotect(p->real_ptr, u_pgsz, PROT_READ)); + + p->ptr = (void*)(((uintptr_t)p->real_ptr) + u_pgsz); + p->real_sz = sz; + aco_assert(sz >= (u_pgsz << 1)); + p->sz = sz - u_pgsz; + } else { + //p->guard_page_enabled = 0; + p->sz = sz; + p->ptr = aco_alloc(sz); + } + + p->owner = NULL; +#ifdef ACO_USE_VALGRIND + p->valgrind_stk_id = VALGRIND_STACK_REGISTER( + p->ptr, (void*)((uintptr_t)p->ptr + p->sz) + ); +#endif +#if defined(__i386__) || defined(__x86_64__) + uintptr_t u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr); + u_p = (u_p >> 4) << 4; + p->align_highptr = (void*)u_p; + p->align_retptr = (void*)(u_p - sizeof(void*)); + *((void**)(p->align_retptr)) = (void*)(aco_funcp_protector_asm); + aco_assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*))); + p->align_limit = p->sz - 16 - (sizeof(void*) << 1); +#else + #error "platform not supporteded yet" +#endif + return p; +} + +public void aco_shared_stack_destroy(aco_shared_stack_t* sstk) { + aco_assert(sstk != NULL && sstk->ptr != NULL); +#ifdef ACO_USE_VALGRIND + VALGRIND_STACK_DEREGISTER(sstk->valgrind_stk_id); +#endif + if (sstk->guard_page_enabled) { + aco_assert(0 == munmap(sstk->real_ptr, sstk->real_sz)); + sstk->real_ptr = NULL; + sstk->ptr = NULL; + } else { + if (aco_dealloc_fn != NULL) aco_dealloc_fn(sstk->ptr); + sstk->ptr = NULL; + } + if (aco_dealloc_fn != NULL) aco_dealloc_fn(sstk); +} + +public aco_t* aco_create( + aco_t* main_co, aco_shared_stack_t* shared_stack, + size_t saved_stack_sz, aco_cofuncp_t fp, void* arg +) { + aco_t* p = aco_alloc(sizeof(aco_t)); + memset(p, 0, sizeof(aco_t)); + + if (main_co != NULL) { // non-main co + aco_assertptr(shared_stack); + p->shared_stack = shared_stack; +#ifdef __i386__ + // POSIX.1-2008 (IEEE Std 1003.1-2008) - General Information - Data Types - Pointer Types + // http://pubs.opengroup.org/onlinepubs/9699919799.2008edition/functions/V2_chap02.html#tag_15_12_03 + p->reg[ACO_REG_IDX_RETADDR] = (void*)fp; + // push retaddr + p->reg[ACO_REG_IDX_SP] = p->shared_stack->align_retptr; + #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + p->reg[ACO_REG_IDX_FPU] = aco_gtls_fpucw_mxcsr[0]; + p->reg[ACO_REG_IDX_FPU + 1] = aco_gtls_fpucw_mxcsr[1]; + #endif +#elif __x86_64__ + p->reg[ACO_REG_IDX_RETADDR] = (void*)fp; + p->reg[ACO_REG_IDX_SP] = p->shared_stack->align_retptr; + #ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + p->reg[ACO_REG_IDX_FPU] = aco_gtls_fpucw_mxcsr[0]; + #endif +#else + #error "platform not supporteded yet" +#endif + p->main_co = main_co; + p->arg = arg; + p->fp = fp; + if (saved_stack_sz == 0) { + saved_stack_sz = 64; + } + p->saved_stack.ptr = aco_alloc(saved_stack_sz); + p->saved_stack.sz = saved_stack_sz; +#if defined(__i386__) || defined(__x86_64__) + p->saved_stack.valid_sz = 0; +#else + #error "platform not supporteded yet" +#endif + return p; + } else { // main co + p->main_co = NULL; + p->arg = arg; + p->fp = fp; + p->shared_stack = NULL; + p->saved_stack.ptr = NULL; + return p; + } + aco_assert(0); +} + +public aco_attr_no_asan +void aco_resume(aco_t* resume_co) { + aco_assert(resume_co != NULL && resume_co->main_co != NULL + && !resume_co->is_finished + ); + if (resume_co->shared_stack->owner != resume_co) { + if (resume_co->shared_stack->owner != NULL) { + aco_t* owner_co = resume_co->shared_stack->owner; + aco_assert(owner_co->shared_stack == resume_co->shared_stack); +#if defined(__i386__) || defined(__x86_64__) + aco_assert( + ( + (uintptr_t)(owner_co->shared_stack->align_retptr) + >= + (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]) + ) + && + ( + (uintptr_t)(owner_co->shared_stack->align_highptr) + - + (uintptr_t)(owner_co->shared_stack->align_limit) + <= + (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]) + ) + ); + owner_co->saved_stack.valid_sz = + (uintptr_t)(owner_co->shared_stack->align_retptr) + - + (uintptr_t)(owner_co->reg[ACO_REG_IDX_SP]); + if (owner_co->saved_stack.sz < owner_co->saved_stack.valid_sz) { + if (aco_dealloc_fn != NULL) aco_dealloc_fn(owner_co->saved_stack.ptr); + owner_co->saved_stack.ptr = NULL; + while (1) { + owner_co->saved_stack.sz = owner_co->saved_stack.sz << 1; + aco_assert(owner_co->saved_stack.sz > 0); + if (owner_co->saved_stack.sz >= owner_co->saved_stack.valid_sz) { + break; + } + } + owner_co->saved_stack.ptr = aco_alloc(owner_co->saved_stack.sz); + } + // TODO: optimize the performance penalty of memcpy function call + // for very short memory span + if (owner_co->saved_stack.valid_sz > 0) { + #ifdef __x86_64__ + aco_fast_memcpy( + owner_co->saved_stack.ptr, + owner_co->reg[ACO_REG_IDX_SP], + owner_co->saved_stack.valid_sz + ); + #else + memcpy( + owner_co->saved_stack.ptr, + owner_co->reg[ACO_REG_IDX_SP], + owner_co->saved_stack.valid_sz + ); + #endif + owner_co->saved_stack.ct_save++; + } + if (owner_co->saved_stack.valid_sz > owner_co->saved_stack.max_cpsz) { + owner_co->saved_stack.max_cpsz = owner_co->saved_stack.valid_sz; + } + owner_co->shared_stack->owner = NULL; + owner_co->shared_stack->align_validsz = 0; +#else + #error "platform not supporteded yet" +#endif + } + aco_assert(resume_co->shared_stack->owner == NULL); +#if defined(__i386__) || defined(__x86_64__) + aco_assert( + resume_co->saved_stack.valid_sz + <= + resume_co->shared_stack->align_limit - sizeof(void*) + ); + // TODO: optimize the performance penalty of memcpy function call + // for very short memory span + if (resume_co->saved_stack.valid_sz > 0) { + void *dst = (void*)( + (uintptr_t)(resume_co->shared_stack->align_retptr) + - resume_co->saved_stack.valid_sz); + #ifdef __x86_64__ + aco_fast_memcpy(dst, resume_co->saved_stack.ptr, resume_co->saved_stack.valid_sz); + #else + memcpy(dst, resume_co->saved_stack.ptr, resume_co->saved_stack.valid_sz); + #endif + resume_co->saved_stack.ct_restore++; + } + if (resume_co->saved_stack.valid_sz > resume_co->saved_stack.max_cpsz) { + resume_co->saved_stack.max_cpsz = resume_co->saved_stack.valid_sz; + } + resume_co->shared_stack->align_validsz = resume_co->saved_stack.valid_sz + sizeof(void*); + resume_co->shared_stack->owner = resume_co; +#else + #error "platform not supporteded yet" +#endif + } + aco_gtls_co = resume_co; + aco_yield_asm(resume_co->main_co, resume_co); + aco_gtls_co = resume_co->main_co; +} + +public void aco_destroy(aco_t* co) { + aco_assertptr(co); + if (aco_is_main_co(co)) { + if (aco_dealloc_fn != NULL) aco_dealloc_fn(co); + } else { + if (co->shared_stack->owner == co) { + co->shared_stack->owner = NULL; + co->shared_stack->align_validsz = 0; + } + if (aco_dealloc_fn != NULL) + aco_dealloc_fn(co->saved_stack.ptr); + co->saved_stack.ptr = NULL; + if (aco_dealloc_fn != NULL) + aco_dealloc_fn(co); + } +} + +public void aco_exit_fn(void*) { + aco_exit(); +} diff --git a/examples/coroutines/aco.h b/examples/coroutines/aco.h new file mode 100644 index 00000000..80d5542b --- /dev/null +++ b/examples/coroutines/aco.h @@ -0,0 +1,214 @@ +// Copyright 2018 Sen Han <00hnes@gmail.com> +// Modifications copyright 2025 Bruce Hill <bruce@bruce-hill.com> +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <time.h> +#include <unistd.h> + +#ifdef ACO_USE_VALGRIND + #include <valgrind/valgrind.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define ACO_VERSION_MAJOR 2 +#define ACO_VERSION_MINOR 0 +#define ACO_VERSION_PATCH 0 + +#ifdef __i386__ + #define ACO_REG_IDX_RETADDR 0 + #define ACO_REG_IDX_SP 1 + #define ACO_REG_IDX_BP 2 + #define ACO_REG_IDX_ARG1 0 + #define ACO_REG_IDX_FPU 6 +#elif __x86_64__ + #define ACO_REG_IDX_RETADDR 4 + #define ACO_REG_IDX_SP 5 + #define ACO_REG_IDX_BP 7 + #define ACO_REG_IDX_EDI 8 + #define ACO_REG_IDX_FPU 8 +#else + #error "platform not supported yet" +#endif + +typedef struct { + void* ptr; + size_t sz; + size_t valid_sz; + // max copy size in bytes + size_t max_cpsz; + // copy from shared stack to this saved stack + size_t ct_save; + // copy from this saved stack to shared stack + size_t ct_restore; +} aco_saved_stack_t; + +struct aco_s; +typedef struct aco_s aco_t; + +typedef struct { + void* ptr; + size_t sz; + void* align_highptr; + void* align_retptr; + size_t align_validsz; + size_t align_limit; + aco_t* owner; + + bool guard_page_enabled; + void* real_ptr; + size_t real_sz; + +#ifdef ACO_USE_VALGRIND + unsigned long valgrind_stk_id; +#endif +} aco_shared_stack_t; + +typedef void (*aco_cofuncp_t)(void*); + +struct aco_s { + // cpu registers' state +#ifdef __i386__ + #ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + void* reg[6]; + #else + void* reg[8]; + #endif +#elif __x86_64__ + #ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + void* reg[8]; + #else + void* reg[9]; + #endif +#else + #error "platform not supported yet" +#endif + aco_t* main_co; + void* arg; + bool is_finished; + + aco_cofuncp_t fp; + + aco_saved_stack_t saved_stack; + aco_shared_stack_t* shared_stack; +}; + +#define aco_likely(x) (__builtin_expect(!!(x), 1)) + +#define aco_unlikely(x) (__builtin_expect(!!(x), 0)) + +#define aco_assert(EX) ((aco_likely(EX))?((void)0):(abort())) + +#define aco_assertptr(ptr) ((aco_likely((ptr) != NULL))?((void)0):(abort())) + +#if defined(aco_attr_no_asan) + #error "aco_attr_no_asan already defined" +#endif +#if defined(ACO_USE_ASAN) + #if defined(__has_feature) + #if __has_feature(__address_sanitizer__) + #define aco_attr_no_asan \ + __attribute__((__no_sanitize_address__)) + #endif + #endif + #if defined(__SANITIZE_ADDRESS__) && !defined(aco_attr_no_asan) + #define aco_attr_no_asan \ + __attribute__((__no_sanitize_address__)) + #endif +#endif +#ifndef aco_attr_no_asan + #define aco_attr_no_asan +#endif + +void aco_runtime_test(void); + +void aco_set_allocator(void* (*alloc)(size_t), void (*dealloc)(void*)); + +void aco_thread_init(aco_cofuncp_t last_word_co_fp); + +void aco_yield_asm(aco_t* from_co, aco_t* to_co) __asm__("aco_yield_asm"); // asm + +void aco_save_fpucw_mxcsr(void* p) __asm__("aco_save_fpucw_mxcsr"); // asm + +void aco_funcp_protector_asm(void) __asm__("aco_funcp_protector_asm"); // asm + +void aco_funcp_protector(void); + +aco_shared_stack_t* aco_shared_stack_new(size_t sz); + +aco_shared_stack_t* aco_shared_stack_new2(size_t sz, bool guard_page_enabled); + +void aco_shared_stack_destroy(aco_shared_stack_t* sstk); + +aco_t* aco_create( + aco_t* main_co, + aco_shared_stack_t* shared_stack, + size_t saved_stack_sz, + aco_cofuncp_t fp, void* arg +); + +// aco's Global Thread Local Storage variable `co` +#ifdef __TINYC__ + #error "TinyCC doesn't support thread-local storage!" +#else +extern __thread aco_t* aco_gtls_co; +#endif + +aco_attr_no_asan +void aco_resume(aco_t* resume_co); + +//void aco_yield1(aco_t* yield_co); +#define aco_yield1(yield_co) do { \ + aco_assertptr((yield_co)); \ + aco_assertptr((yield_co)->main_co); \ + aco_yield_asm((yield_co), (yield_co)->main_co); \ +} while (0) + +#define aco_yield() aco_yield1(aco_gtls_co) + +#define aco_get_arg() (aco_gtls_co->arg) + +#define aco_get_co() ({(void)0; aco_gtls_co;}) + +void aco_destroy(aco_t* co); + +#define aco_is_main_co(co) ({((co)->main_co) == NULL;}) + +#define aco_exit1(co) do { \ + (co)->is_finished = true; \ + aco_assert((co)->shared_stack->owner == (co)); \ + (co)->shared_stack->owner = NULL; \ + (co)->shared_stack->align_validsz = 0; \ + aco_yield1((co)); \ + aco_assert(0); \ +} while (0) + +#define aco_exit() aco_exit1(aco_gtls_co) + +void aco_exit_fn(void*); + +#ifdef __cplusplus +} +#endif diff --git a/examples/coroutines/acoyield.S b/examples/coroutines/acoyield.S new file mode 100644 index 00000000..7bc87ff1 --- /dev/null +++ b/examples/coroutines/acoyield.S @@ -0,0 +1,208 @@ +.text +.globl aco_yield_asm +#if defined(__APPLE__) +#else +.type aco_yield_asm, @function +#endif +.intel_syntax noprefix +aco_yield_asm: +/* + extern void aco_yield_asm(aco_t* from_co, aco_t* to_co); + + struct aco_t { + void* reg[X]; + // ... + } + + reference: + https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI + + pitfall: + http://man7.org/linux/man-pages/man7/signal.7.html + http://man7.org/linux/man-pages/man2/sigaltstack.2.html + + > $ man 7 signal + > ... + > By default, the signal handler is invoked on the normal process + > stack. It is possible to arrange that the signal handler + > uses an alternate stack; see sigaltstack(2) for a discussion of + > how to do this and when it might be useful. + > ... + + This is a BUG example: + https://github.com/Tencent/libco/blob/v1.0/coctx_swap.S#L27 + + proof of correctness: + https://github.com/hnes/libaco + + mxcsr & fpu: + fnstcw * m2byte + Store FPU control word to m2byte without checking for + pending unmasked floating-point exceptions. + + fldcw m2byte + Load FPU control word from m2byte. + + stmxcsr m32 + Store contents of MXCSR register to m32 + + ldmxcsr m32 + Load MXCSR register from m32. +*/ +/* + 0x00 --> 0xff + eip esp ebp edi esi ebx fpucw16 mxcsr32 + 0 4 8 c 10 14 18 1c +*/ +#ifdef __i386__ + mov eax,DWORD PTR [esp+0x4] // from_co + mov edx,DWORD PTR [esp] // retaddr + lea ecx,[esp+0x4] // esp + mov DWORD PTR [eax+0x8],ebp //<ebp + mov DWORD PTR [eax+0x4],ecx //<esp + mov DWORD PTR [eax+0x0],edx //<retaddr + mov DWORD PTR [eax+0xc],edi //<edi + mov ecx,DWORD PTR [esp+0x8] // to_co + mov DWORD PTR [eax+0x10],esi //<esi + mov DWORD PTR [eax+0x14],ebx //<ebx +#ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + fnstcw WORD PTR [eax+0x18] //<fpucw + stmxcsr DWORD PTR [eax+0x1c] //<mxcsr +#endif + mov edx,DWORD PTR [ecx+0x4] //>esp + mov ebp,DWORD PTR [ecx+0x8] //>ebp + mov eax,DWORD PTR [ecx+0x0] //>retaddr + mov edi,DWORD PTR [ecx+0xc] //>edi + mov esi,DWORD PTR [ecx+0x10] //>esi + mov ebx,DWORD PTR [ecx+0x14] //>ebx +#ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + fldcw WORD PTR [ecx+0x18] //>fpucw + ldmxcsr DWORD PTR [ecx+0x1c] //>mxcsr +#endif + xor ecx,ecx + mov esp,edx + mov edx,eax + + // Pass the user-provided argument as first argument: +#ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + mov eax,DWORD PTR [ecx+0x24] +#else + mov eax,DWORD PTR [ecx+0x28] +#endif + + jmp edx +#elif __x86_64__ +/* + 0x00 --> 0xff + r12 r13 r14 r15 rip rsp rbx rbp fpucw16 mxcsr32 + 0 8 10 18 20 28 30 38 40 44 +*/ + // rdi - from_co | rsi - to_co + mov rdx,QWORD PTR [rsp] // retaddr + lea rcx,[rsp+0x8] // rsp + mov QWORD PTR [rdi+0x0], r12 + mov QWORD PTR [rdi+0x8], r13 + mov QWORD PTR [rdi+0x10],r14 + mov QWORD PTR [rdi+0x18],r15 + mov QWORD PTR [rdi+0x20],rdx // retaddr + mov QWORD PTR [rdi+0x28],rcx // rsp + mov QWORD PTR [rdi+0x30],rbx + mov QWORD PTR [rdi+0x38],rbp +#ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + fnstcw WORD PTR [rdi+0x40] + stmxcsr DWORD PTR [rdi+0x44] +#endif + mov r12,QWORD PTR [rsi+0x0] + mov r13,QWORD PTR [rsi+0x8] + mov r14,QWORD PTR [rsi+0x10] + mov r15,QWORD PTR [rsi+0x18] + mov rax,QWORD PTR [rsi+0x20] // retaddr + mov rcx,QWORD PTR [rsi+0x28] // rsp + mov rbx,QWORD PTR [rsi+0x30] + mov rbp,QWORD PTR [rsi+0x38] +#ifndef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + fldcw WORD PTR [rsi+0x40] + ldmxcsr DWORD PTR [rsi+0x44] +#endif + + // Pass the user-provided argument as first argument: +#ifdef ACO_CONFIG_SHARE_FPU_MXCSR_ENV + mov rdi,QWORD PTR [rsi+0x48] +#else + mov rdi,QWORD PTR [rsi+0x50] +#endif + + mov rsp,rcx + jmp rax +#else + #error "platform not supported" +#endif + +.globl aco_save_fpucw_mxcsr +#if defined(__APPLE__) +#else +.type aco_save_fpucw_mxcsr, @function +#endif +.intel_syntax noprefix +aco_save_fpucw_mxcsr: +#ifdef __i386__ + mov eax,DWORD PTR [esp+0x4] // ptr + fnstcw WORD PTR [eax] + stmxcsr DWORD PTR [eax+0x4] + ret +#elif __x86_64__ + fnstcw WORD PTR [rdi] + stmxcsr DWORD PTR [rdi+0x4] + ret +#else + #error "platform not supported" +#endif + +#if defined(__APPLE__) +.globl _abort +.globl _aco_funcp_protector +#else +.globl abort +.globl aco_funcp_protector +#endif + +.globl aco_funcp_protector_asm +#if defined(__APPLE__) +#else +.type aco_funcp_protector_asm, @function +#endif +.intel_syntax noprefix +aco_funcp_protector_asm: +#ifdef __i386__ + and esp,0xfffffff0 + #if defined(__APPLE__) + call _aco_funcp_protector + call _abort + #else + #if defined(__pic__) || defined(__PIC__) + call aco_funcp_protector@PLT + call abort@PLT + #else + call aco_funcp_protector + call abort + #endif + #endif + ret +#elif __x86_64__ + and rsp,0xfffffffffffffff0 + #if defined(__APPLE__) + call _aco_funcp_protector + call _abort + #else + #if defined(__pic__) || defined(__PIC__) + call aco_funcp_protector@PLT + call abort@PLT + #else + call aco_funcp_protector + call abort + #endif + #endif + ret +#else + #error "platform not supported" +#endif diff --git a/examples/coroutines/coroutines.tm b/examples/coroutines/coroutines.tm new file mode 100644 index 00000000..b530a685 --- /dev/null +++ b/examples/coroutines/coroutines.tm @@ -0,0 +1,67 @@ +# This is a coroutine library that uses libaco (https://libaco.org) +# +# Lua programmers will recognize this as similar to Lua's stackful coroutines. +# +# Async/Await programmers will weep at its beauty and gnash their teeth and +# rend their garments in despair at what they could have had. + +use ./aco.h +use ./aco.c +use ./acoyield.S + +func main() + say("Example usage") + co := Coroutine(func() + say("I'm in the coroutine!") + yield() + say("I'm back in the coroutine!") + ) + >> co + say("I'm in the main func") + >> co.resume() + say("I'm back in the main func") + >> co.resume() + say("I'm back in the main func again") + >> co.resume() + +struct aco_t(; extern, opaque) +struct aco_shared_stack_t(; extern, opaque) + +_main_co : @aco_t? = none +_shared_stack : @aco_shared_stack_t? = none + +struct Coroutine(co:@aco_t) + convert(fn:func() -> Coroutine) + if not _main_co + _init() + + main_co := _main_co + shared_stack := _shared_stack + aco_ptr := C_code:@aco_t( + aco_create(@main_co, @shared_stack, 0, (void*)@fn.fn, @fn.userdata) + ) + return Coroutine(aco_ptr) + + func is_finished(co:Coroutine->Bool; inline) + return C_code:Bool(((aco_t*)@co.co)->is_finished) + + func resume(co:Coroutine->Bool) + if co.is_finished() + return no + C_code { aco_resume(@co.co); } + return yes + +func _init() + C_code { + aco_set_allocator(GC_malloc, NULL); + aco_thread_init(aco_exit_fn); + } + _main_co = C_code:@aco_t(aco_create(NULL, NULL, 0, NULL, NULL)) + + _shared_stack = C_code:@aco_shared_stack_t(aco_shared_stack_new(0)) + +func yield(; inline) + C_code { + aco_yield(); + } + |
