Bladeren bron

Add more gc control to improve performance (#510)

* add more gc control

* fix

* Update gc.c

* Update ceval.c

* [no ci] Update 711_gc.py
BLUELOVETH 1 week geleden
bovenliggende
commit
c624833cfb

+ 7 - 7
include/pocketpy/objects/object.h

@@ -6,8 +6,8 @@
 typedef struct PyObject {
     py_Type type;  // we have a duplicated type here for convenience
     uint8_t size_8b;
-    bool gc_marked;
-    int slots;  // number of slots in the object
+    uint8_t gc_marked;  // lsb (self is marked), 2nd lsb (no recursively mark)
+    int slots;          // number of slots in the object
     char flex[];
 } PyObject;
 
@@ -25,11 +25,11 @@ void* PyObject__userdata(PyObject* self);
 
 void PyObject__dtor(PyObject* self);
 
-
 #define pk__mark_value(val)                                                                        \
-    if((val)->is_ptr && !(val)->_obj->gc_marked) {                                                 \
+    if((val)->is_ptr) {                                                                            \
         PyObject* obj = (val)->_obj;                                                               \
-        obj->gc_marked = true;                                                                     \
-        c11_vector__push(PyObject*, p_stack, obj);                                                 \
+        if(!(obj->gc_marked & 0b01)) {                                                             \
+            obj->gc_marked |= 0b01;                                                                \
+            if(!(obj->gc_marked & 0b10)) { c11_vector__push(PyObject*, p_stack, obj); }            \
+        }                                                                                          \
     }
-

+ 13 - 0
include/typings/gc.pyi

@@ -25,3 +25,16 @@ def collect_hint() -> int:
 
 def setup_debug_callback(cb: Callable[[Literal['start', 'stop'], str], None] | None) -> None:
     """Setup a callback that will be triggered at the end of each collection."""
+
+def is_tracked(obj: object) -> bool:
+    """Return true if the object is tracked recursively."""
+
+def track(obj: object) -> None:
+    """Start tracking this object recursively."""
+
+def untrack(obj: object) -> None:
+    """Stop tracking this object recursively.
+    
+    This improves performance for container objects with value types like `list[int]`.
+    """
+

+ 5 - 1
src/interpreter/ceval.c

@@ -539,11 +539,15 @@ __NEXT_STEP:
             DISPATCH();
         }
         case OP_BUILD_TUPLE: {
+            bool need_track = false;
             py_TValue tmp;
             py_Ref p = py_newtuple(&tmp, byte.arg);
             py_TValue* begin = SP() - byte.arg;
-            for(int i = 0; i < byte.arg; i++)
+            for(int i = 0; i < byte.arg; i++) {
                 p[i] = begin[i];
+                if(p[i].is_ptr) need_track = true;
+            }
+            if(!need_track) tmp._obj->gc_marked |= 0b10;
             SP() = begin;
             PUSH(&tmp);
             DISPATCH();

+ 3 - 3
src/interpreter/heap.c

@@ -202,8 +202,8 @@ int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
     int large_living_count = 0;
     for(int i = 0; i < self->large_objects.length; i++) {
         PyObject* obj = c11__getitem(PyObject*, &self->large_objects, i);
-        if(obj->gc_marked) {
-            obj->gc_marked = false;
+        if(obj->gc_marked & 0b01) {
+            obj->gc_marked &= 0b10;
             c11__setitem(PyObject*, &self->large_objects, large_living_count, obj);
             large_living_count++;
         } else {
@@ -238,7 +238,7 @@ PyObject* ManagedHeap__gcnew(ManagedHeap* self, py_Type type, int slots, int uds
     }
     obj->type = type;
     obj->size_8b = size_8b;
-    obj->gc_marked = false;
+    obj->gc_marked = 0;
     obj->slots = slots;
 
     // initialize slots or dict

+ 4 - 4
src/interpreter/objectpool.c

@@ -38,16 +38,16 @@ static int PoolArena__sweep_dealloc(PoolArena* self, int* out_types) {
             self->unused[self->unused_length] = i;
             self->unused_length++;
         } else {
-            if(!obj->gc_marked) {
+            if(obj->gc_marked & 0b01) {
+                // marked, clear mark
+                obj->gc_marked &= 0b10;
+            } else {
                 // not marked, need to free
                 if(out_types) out_types[obj->type]++;
                 PyObject__dtor(obj);
                 obj->type = 0;
                 self->unused[self->unused_length] = i;
                 self->unused_length++;
-            } else {
-                // marked, clear mark
-                obj->gc_marked = false;
             }
         }
     }

+ 5 - 5
src/interpreter/vm.c

@@ -265,10 +265,10 @@ void VM__ctor(VM* self) {
     pk__add_module_unicodedata();
 
     pk__add_module_conio();
-    pk__add_module_lz4();       // optional
-    pk__add_module_cute_png();  // optional
-    pk__add_module_msgpack();   // optional
-    py__add_module_periphery(); // optional
+    pk__add_module_lz4();        // optional
+    pk__add_module_cute_png();   // optional
+    pk__add_module_msgpack();    // optional
+    py__add_module_periphery();  // optional
     pk__add_module_pkpy();
     pk__add_module_picoterm();
 
@@ -697,7 +697,7 @@ void ManagedHeap__mark(ManagedHeap* self) {
         PyObject* obj = c11_vector__back(PyObject*, p_stack);
         c11_vector__pop(p_stack);
 
-        assert(obj->gc_marked);
+        assert(obj->gc_marked & 0b01);
 
         if(obj->slots > 0) {
             py_TValue* p = PyObject__slots(obj);

+ 31 - 0
src/modules/gc.c

@@ -48,6 +48,33 @@ static bool gc_setup_debug_callback(int argc, py_Ref argv) {
     return true;
 }
 
+static bool gc_is_tracked(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    if(!argv->is_ptr) {
+        py_newbool(py_retval(), false);
+        return true;
+    }
+    bool res = !(argv->_obj->gc_marked & 0b10);
+    py_newbool(py_retval(), res);
+    return true;
+}
+
+static bool gc_track(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    if(!argv->is_ptr) return TypeError("gc.track() only accepts objects");
+    argv->_obj->gc_marked &= 0b01;
+    py_newnone(py_retval());
+    return true;
+}
+
+static bool gc_untrack(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(1);
+    if(!argv->is_ptr) return TypeError("gc.untrack() only accepts objects");
+    argv->_obj->gc_marked |= 0b10;
+    py_newnone(py_retval());
+    return true;
+}
+
 void pk__add_module_gc() {
     py_Ref mod = py_newmodule("gc");
 
@@ -58,4 +85,8 @@ void pk__add_module_gc() {
     py_bindfunc(mod, "collect", gc_collect);
     py_bindfunc(mod, "collect_hint", gc_collect_hint);
     py_bindfunc(mod, "setup_debug_callback", gc_setup_debug_callback);
+
+    py_bindfunc(mod, "is_tracked", gc_is_tracked);
+    py_bindfunc(mod, "track", gc_track);
+    py_bindfunc(mod, "untrack", gc_untrack);
 }

+ 7 - 1
tests/711_gc.py

@@ -12,4 +12,10 @@ gc.collect()
 
 create_garbage()
 create_garbage()
-create_garbage()
+c = create_garbage()
+
+assert gc.is_tracked(c) == True
+gc.untrack(c)
+assert gc.is_tracked(c) == False
+gc.track(c)
+assert gc.is_tracked(c) == True