소스 검색

Removed busyloop from SDL_GPUFence on MacOS

Before, MetalFence was implemented as simply a busy loop on an atomic
int on metal, meaning the cpu would busy wait on the gpu to finish
taking power from it and decreasing battery life. This was the only kind
of cpu-gpu syncing (apart from requesting a swapchain)
Alex Tselousov 3 달 전
부모
커밋
514b26e4c4
1개의 변경된 파일37개의 추가작업 그리고 24개의 파일을 삭제
  1. 37 24
      src/gpu/metal/SDL_gpu_metal.m

+ 37 - 24
src/gpu/metal/SDL_gpu_metal.m

@@ -430,6 +430,7 @@ static MTLDepthClipMode SDLToMetal_DepthClipMode(
 // Structs
 
 typedef struct MetalRenderer MetalRenderer;
+typedef struct MetalCommandBuffer MetalCommandBuffer;
 
 typedef struct MetalTexture
 {
@@ -453,7 +454,8 @@ typedef struct MetalTextureContainer
 
 typedef struct MetalFence
 {
-    SDL_AtomicInt complete;
+    // can be NULL if the command buffer was recycled
+    MetalCommandBuffer *commandBuffer;
     SDL_AtomicInt referenceCount;
 } MetalFence;
 
@@ -2093,7 +2095,6 @@ static Uint8 METAL_INTERNAL_CreateFence(
     MetalFence *fence;
 
     fence = SDL_calloc(1, sizeof(MetalFence));
-    SDL_SetAtomicInt(&fence->complete, 0);
     SDL_SetAtomicInt(&fence->referenceCount, 0);
 
     // Add it to the available pool
@@ -2136,7 +2137,7 @@ static bool METAL_INTERNAL_AcquireFence(
 
     // Associate the fence with the command buffer
     commandBuffer->fence = fence;
-    SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right?
+    fence->commandBuffer = commandBuffer;
     (void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount);
 
     return true;
@@ -3517,6 +3518,8 @@ static void METAL_INTERNAL_CleanCommandBuffer(
         METAL_ReleaseFence(
             (SDL_GPURenderer *)renderer,
             (SDL_GPUFence *)commandBuffer->fence);
+    } else {
+        commandBuffer->fence->commandBuffer = NULL;
     }
 
     // Return command buffer to pool
@@ -3587,6 +3590,16 @@ static void METAL_INTERNAL_PerformPendingDestroys(
 }
 
 // Fences
+static bool METAL_INTERNAL_IsFenceBusy(
+        MetalFence *fence
+) {
+    if (!fence->commandBuffer) {
+        return false; // command buffer was recycled
+    }
+
+    MTLCommandBufferStatus status = fence->commandBuffer->handle.status;
+    return status == MTLCommandBufferStatusCommitted || status == MTLCommandBufferStatusScheduled;
+}
 
 static bool METAL_WaitForFences(
     SDL_GPURenderer *driverData,
@@ -3596,24 +3609,29 @@ static bool METAL_WaitForFences(
 {
     @autoreleasepool {
         MetalRenderer *renderer = (MetalRenderer *)driverData;
-        bool waiting;
 
         if (waitAll) {
             for (Uint32 i = 0; i < numFences; i += 1) {
-                while (!SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete)) {
-                    // Spin!
+                MetalFence *fence = (MetalFence *)fences[i];
+                if (METAL_INTERNAL_IsFenceBusy(fence)) {
+                    [fence->commandBuffer->handle waitUntilCompleted];
                 }
             }
         } else {
-            waiting = 1;
-            while (waiting) {
-                for (Uint32 i = 0; i < numFences; i += 1) {
-                    if (SDL_GetAtomicInt(&((MetalFence *)fences[i])->complete) > 0) {
-                        waiting = 0;
-                        break;
-                    }
-                }
+            dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
+            for (Uint32 i = 0; i < numFences; i += 1) {
+                MetalFence *fence = (MetalFence *)fences[i];
+                // command buffer has completed and been recycled
+                if(!fence->commandBuffer)
+                    return true;
+
+                // even if it's completed, the handle will call back straight away
+                [fence->commandBuffer->handle addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
+                    dispatch_semaphore_signal(semaphore);
+                }];
             }
+
+            dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
         }
 
         METAL_INTERNAL_PerformPendingDestroys(renderer);
@@ -3627,7 +3645,7 @@ static bool METAL_QueryFence(
     SDL_GPUFence *fence)
 {
     MetalFence *metalFence = (MetalFence *)fence;
-    return SDL_GetAtomicInt(&metalFence->complete) == 1;
+    return METAL_INTERNAL_IsFenceBusy(metalFence);
 }
 
 // Window and Swapchain Management
@@ -4090,11 +4108,6 @@ static bool METAL_Submit(
             windowData->frameCounter = (windowData->frameCounter + 1) % renderer->allowedFramesInFlight;
         }
 
-        // Notify the fence when the command buffer has completed
-        [metalCommandBuffer->handle addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
-          SDL_AtomicIncRef(&metalCommandBuffer->fence->complete);
-        }];
-
         // Submit the command buffer
         [metalCommandBuffer->handle commit];
         metalCommandBuffer->handle = nil;
@@ -4112,7 +4125,8 @@ static bool METAL_Submit(
 
         // Check if we can perform any cleanups
         for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) {
-            if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) {
+
+            if (!METAL_INTERNAL_IsFenceBusy(renderer->submittedCommandBuffers[i]->fence)) {
                 METAL_INTERNAL_CleanCommandBuffer(
                     renderer,
                     renderer->submittedCommandBuffers[i],
@@ -4165,9 +4179,8 @@ static bool METAL_Wait(
          * Sort of equivalent to vkDeviceWaitIdle.
          */
         for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) {
-            while (!SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) {
-                // Spin!
-            }
+            SDL_GPUFence *opaqueFence = (SDL_GPUFence *)renderer->submittedCommandBuffers[i]->fence;
+            METAL_WaitForFences(driverData, true, &opaqueFence, 1);
         }
 
         SDL_LockMutex(renderer->submitLock);