Skip to content

Commit 22ee634

Browse files
committed
CUDA: fuse ffn gate for mmvf
1 parent 03792ad commit 22ee634

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1877
-745
lines changed

ggml/src/ggml-cuda/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ if (CUDAToolkit_FOUND)
5050
list(APPEND GGML_SOURCES_CUDA ${SRCS})
5151
file(GLOB SRCS "template-instances/mmq*.cu")
5252
list(APPEND GGML_SOURCES_CUDA ${SRCS})
53+
file(GLOB SRCS "template-instances/mmvq*.cu")
54+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
55+
file(GLOB SRCS "template-instances/mmvf*.cu")
56+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
5357
file(GLOB SRCS "template-instances/mmf*.cu")
5458
list(APPEND GGML_SOURCES_CUDA ${SRCS})
5559

ggml/src/ggml-cuda/common.cuh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,3 +1005,16 @@ struct ggml_backend_cuda_context {
10051005
return pool(device);
10061006
}
10071007
};
1008+
1009+
struct ggml_cuda_mm_fusion_args_host {
1010+
const ggml_tensor * x_bias = nullptr;
1011+
const ggml_tensor * gate = nullptr;
1012+
const ggml_tensor * gate_bias = nullptr;
1013+
ggml_glu_op glu_op;
1014+
};
1015+
struct ggml_cuda_mm_fusion_args_device {
1016+
const void * x_bias = nullptr;
1017+
const void * gate = nullptr;
1018+
const void * gate_bias = nullptr;
1019+
ggml_glu_op glu_op;
1020+
};

ggml/src/ggml-cuda/convert.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#pragma once
12
#include "common.cuh"
23

34
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 340 additions & 4 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/mmvf.cu

Lines changed: 33 additions & 329 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)