From 5f019340be58f34c74b2c771b483f5115ce78873 Mon Sep 17 00:00:00 2001
From: Synray <31429825+Synray@users.noreply.github.com>
Date: Tue, 15 Aug 2023 13:01:40 -0700
Subject: [PATCH] Small backprop optimization, fix gym.h

Instead of averaging every parameter's gradient at the end, just average
the output gradient at the start, reducing the number of divisions. This
is equivalent because the `1/n` term propagates backwards to all the
gradients.
---
 gym.h |  2 +-
 nn.h  | 15 ++-------------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/gym.h b/gym.h
index 50a9438..c71271e 100644
--- a/gym.h
+++ b/gym.h
@@ -272,7 +272,7 @@ void gym_nn_image_grayscale(NN nn, void *pixels, size_t width, size_t height, si
             float a = ROW_AT(NN_OUTPUT(nn), 0);
             if (a < low) a = low;
             if (a > high) a = high;
-            uint32_t pixel = (a + low)/(high - low)*255.f;
+            uint32_t pixel = (a - low)/(high - low)*255.f;
             pixels_u32[y*stride + x] = (0xFF<<(8*3))|(pixel<<(8*2))|(pixel<<(8*1))|(pixel<<(8*0));
         }
     }
diff --git a/nn.h b/nn.h
index 469c060..ff18792 100644
--- a/nn.h
+++ b/nn.h
@@ -400,9 +400,9 @@ NN nn_backprop(Region *r, NN nn, Mat t)
 
         for (size_t j = 0; j < out.cols; ++j) {
 #ifdef NN_BACKPROP_TRADITIONAL
-            ROW_AT(NN_OUTPUT(g), j) = 2*(ROW_AT(NN_OUTPUT(nn), j) - ROW_AT(out, j));
+            ROW_AT(NN_OUTPUT(g), j) = 2.0f/n*(ROW_AT(NN_OUTPUT(nn), j) - ROW_AT(out, j));
 #else
-            ROW_AT(NN_OUTPUT(g), j) = ROW_AT(NN_OUTPUT(nn), j) - ROW_AT(out, j);
+            ROW_AT(NN_OUTPUT(g), j) = 1.0f/n*(ROW_AT(NN_OUTPUT(nn), j) - ROW_AT(out, j));
 #endif // NN_BACKPROP_TRADITIONAL
         }
 
@@ -430,17 +430,6 @@ NN nn_backprop(Region *r, NN nn, Mat t)
         }
     }
 
-    for (size_t i = 0; i < g.arch_count-1; ++i) {
-        for (size_t j = 0; j < g.ws[i].rows; ++j) {
-            for (size_t k = 0; k < g.ws[i].cols; ++k) {
-                MAT_AT(g.ws[i], j, k) /= n;
-            }
-        }
-        for (size_t k = 0; k < g.bs[i].cols; ++k) {
-            ROW_AT(g.bs[i], k) /= n;
-        }
-    }
-
     return g;
 }