feat: implement gemma3n text model in MLXLLM (#346)

xlab · web-flow · commit 505c86f60716 · 2025-07-22T09:49:18.000-07:00
* feat: implement gemma3n text model in MLXLLM

* added to LLMModelFactory
* added to MLXService
* added to MLXChatExample
diff --git a/.gitignore b/.gitignore
@@ -92,4 +92,6 @@ iOSInjectionProject/
 # OS
 .DS_Store
 
-.idea
+.idea
+.vscode
+
diff --git a/Applications/MLXChatExample/Services/MLXService.swift b/Applications/MLXChatExample/Services/MLXService.swift
@@ -30,6 +30,8 @@ class MLXService {
         LMModel(name: "qwen2VL:2b", configuration: VLMRegistry.qwen2VL2BInstruct4Bit, type: .vlm),
         LMModel(name: "smolVLM", configuration: VLMRegistry.smolvlminstruct4bit, type: .vlm),
         LMModel(name: "acereason:7B", configuration: LLMRegistry.acereason_7b_4bit, type: .llm),
+        LMModel(name: "gemma3n:E2B", configuration: LLMRegistry.gemma3n_E2B_it_lm_4bit, type: .llm),
+        LMModel(name: "gemma3n:E4B", configuration: LLMRegistry.gemma3n_E4B_it_lm_4bit, type: .llm),
     ]
 
     /// Cache to store loaded model containers to avoid reloading.
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -35,15 +35,16 @@ public class LLMTypeRegistry: ModelTypeRegistry, @unchecked Sendable {
             "phimoe": create(PhiMoEConfiguration.self, PhiMoEModel.init),
             "gemma": create(GemmaConfiguration.self, GemmaModel.init),
             "gemma2": create(Gemma2Configuration.self, Gemma2Model.init),
+            "gemma3": create(Gemma3TextConfiguration.self, Gemma3TextModel.init),
+            "gemma3_text": create(Gemma3TextConfiguration.self, Gemma3TextModel.init),
+            "gemma3n": create(Gemma3nTextConfiguration.self, Gemma3nTextModel.init),
             "qwen2": create(Qwen2Configuration.self, Qwen2Model.init),
             "qwen3": create(Qwen3Configuration.self, Qwen3Model.init),
             "qwen3_moe": create(Qwen3MoEConfiguration.self, Qwen3MoEModel.init),
             "starcoder2": create(Starcoder2Configuration.self, Starcoder2Model.init),
             "cohere": create(CohereConfiguration.self, CohereModel.init),
             "openelm": create(OpenElmConfiguration.self, OpenELMModel.init),
             "internlm2": create(InternLM2Configuration.self, InternLM2Model.init),
-            "gemma3_text": create(Gemma3TextConfiguration.self, Gemma3TextModel.init),
-            "gemma3": create(Gemma3TextConfiguration.self, Gemma3TextModel.init),
             "granite": create(GraniteConfiguration.self, GraniteModel.init),
             "mimo": create(MiMoConfiguration.self, MiMoModel.init),
             "glm4": create(GLM4Configuration.self, GLM4Model.init),
@@ -56,7 +57,6 @@ public class LLMTypeRegistry: ModelTypeRegistry, @unchecked Sendable {
             "exaone4": create(Exaone4Configuration.self, Exaone4Model.init),
         ]
     }
-
 }
 
 /// Registry of models and any overrides that go with them, e.g. prompt augmentation.
@@ -138,6 +138,40 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
         defaultPrompt: "What is the difference between lettuce and cabbage?"
     )
 
+    static public let gemma3_1B_qat_4bit = ModelConfiguration(
+        id: "mlx-community/gemma-3-1b-it-qat-4bit",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        extraEOSTokens: ["<end_of_turn>"]
+    )
+
+    static public let gemma3n_E4B_it_lm_bf16 = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E4B-it-lm-bf16",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        // https://ai.google.dev/gemma/docs/core/prompt-structure
+        extraEOSTokens: ["<end_of_turn>"]
+    )
+
+    static public let gemma3n_E2B_it_lm_bf16 = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E2B-it-lm-bf16",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        // https://ai.google.dev/gemma/docs/core/prompt-structure
+        extraEOSTokens: ["<end_of_turn>"]
+    )
+
+    static public let gemma3n_E4B_it_lm_4bit = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E4B-it-lm-4bit",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        // https://ai.google.dev/gemma/docs/core/prompt-structure
+        extraEOSTokens: ["<end_of_turn>"]
+    )
+
+    static public let gemma3n_E2B_it_lm_4bit = ModelConfiguration(
+        id: "mlx-community/gemma-3n-E2B-it-lm-4bit",
+        defaultPrompt: "What is the difference between a fruit and a vegetable?",
+        // https://ai.google.dev/gemma/docs/core/prompt-structure
+        extraEOSTokens: ["<end_of_turn>"]
+    )
+
     static public let qwen205b4bit = ModelConfiguration(
         id: "mlx-community/Qwen1.5-0.5B-Chat-4bit",
         overrideTokenizer: "PreTrainedTokenizer",
@@ -205,12 +239,6 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
         defaultPrompt: "What is the difference between a fruit and a vegetable?"
     )
 
-    static public let gemma3_1B_qat_4bit = ModelConfiguration(
-        id: "mlx-community/gemma-3-1b-it-qat-4bit",
-        defaultPrompt: "What is the difference between a fruit and a vegetable?",
-        extraEOSTokens: ["<end_of_turn>"]
-    )
-
     static public let granite3_3_2b_4bit = ModelConfiguration(
         id: "mlx-community/granite-3.3-2b-instruct-4bit",
         defaultPrompt: ""
@@ -268,6 +296,11 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
             gemma2bQuantized,
             gemma_2_2b_it_4bit,
             gemma_2_9b_it_4bit,
+            gemma3_1B_qat_4bit,
+            gemma3n_E4B_it_lm_bf16,
+            gemma3n_E2B_it_lm_bf16,
+            gemma3n_E4B_it_lm_4bit,
+            gemma3n_E2B_it_lm_4bit,
             granite3_3_2b_4bit,
             llama3_1_8B_4bit,
             llama3_2_1B_4bit,
@@ -288,7 +321,6 @@ public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
             qwen3_8b_4bit,
             qwen3MoE_30b_a3b_4bit,
             smolLM_135M_4bit,
-            gemma3_1B_qat_4bit,
             mimo_7b_sft_4bit,
             glm4_9b_4bit,
             acereason_7b_4bit,
diff --git a/Libraries/MLXLLM/Models/Gemma3nText.swift b/Libraries/MLXLLM/Models/Gemma3nText.swift

-Original file line number
+Diff line change
 # OS
 .DS_Store
 -.idea
 +.idea
 +.vscode
++
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,8 @@ class MLXService {`
`30`	`30`	`LMModel(name: "qwen2VL:2b", configuration: VLMRegistry.qwen2VL2BInstruct4Bit, type: .vlm),`
`31`	`31`	`LMModel(name: "smolVLM", configuration: VLMRegistry.smolvlminstruct4bit, type: .vlm),`
`32`	`32`	`LMModel(name: "acereason:7B", configuration: LLMRegistry.acereason_7b_4bit, type: .llm),`
	`33`	`+ LMModel(name: "gemma3n:E2B", configuration: LLMRegistry.gemma3n_E2B_it_lm_4bit, type: .llm),`
	`34`	`+ LMModel(name: "gemma3n:E4B", configuration: LLMRegistry.gemma3n_E4B_it_lm_4bit, type: .llm),`
`33`	`35`	`]`
`34`	`36`
`35`	`37`	`/// Cache to store loaded model containers to avoid reloading.`