ggml-org
diff --git a/‎ggml/src/ggml-cuda/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎ggml/src/ggml-cuda/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 13 additions & 0 deletions b/‎ggml/src/ggml-cuda/common.cuh‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎ggml/src/ggml-cuda/convert.cuh‎
Lines changed: 1 addition & 0 deletions b/‎ggml/src/ggml-cuda/convert.cuh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 340 additions & 4 deletions b/‎ggml/src/ggml-cuda/ggml-cuda.cu‎
Lines changed: 340 additions & 4 deletions
diff --git a/‎ggml/src/ggml-cuda/mmvf.cu‎
Lines changed: 33 additions & 329 deletions b/‎ggml/src/ggml-cuda/mmvf.cu‎
Lines changed: 33 additions & 329 deletions
@@ -50,6 +50,10 @@ if (CUDAToolkit_FOUND)
     list(APPEND GGML_SOURCES_CUDA ${SRCS})
     file(GLOB   SRCS "template-instances/mmq*.cu")
     list(APPEND GGML_SOURCES_CUDA ${SRCS})
+    file(GLOB   SRCS "template-instances/mmvq*.cu")
+    list(APPEND GGML_SOURCES_CUDA ${SRCS})
+    file(GLOB   SRCS "template-instances/mmvf*.cu")
+    list(APPEND GGML_SOURCES_CUDA ${SRCS})
     file(GLOB   SRCS "template-instances/mmf*.cu")
     list(APPEND GGML_SOURCES_CUDA ${SRCS})
 
 
@@ -1005,3 +1005,16 @@ struct ggml_backend_cuda_context {
         return pool(device);
     }
 };
+
+struct ggml_cuda_mm_fusion_args_host {
+    const ggml_tensor * x_bias = nullptr;
+    const ggml_tensor * gate = nullptr;
+    const ggml_tensor * gate_bias = nullptr;
+    ggml_glu_op glu_op;
+};
+struct ggml_cuda_mm_fusion_args_device {
+    const void * x_bias = nullptr;
+    const void * gate = nullptr;
+    const void * gate_bias = nullptr;
+    ggml_glu_op glu_op;
+};
@@ -1,3 +1,4 @@
+#pragma once
 #include "common.cuh"
 
 #define CUDA_DEQUANTIZE_BLOCK_SIZE 256
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+#pragma once`
`1`	`2`	`#include "common.cuh"`
`2`	`3`
`3`	`4`	`#define CUDA_DEQUANTIZE_BLOCK_SIZE 256`