@@ -100,6 +100,7 @@ def _test_gap_patch_rebalance(allocator: BaseLayerAllocator):
100100 per_node_mem = {
101101 nid : (node .per_decoder_layer_kv_cache_memory or 0 )
102102 for nid , node in allocator .node_id_to_node .items ()
103+ if nid in allocator .node_allocation
103104 }
104105 min_mem = min (per_node_mem .values ()) if per_node_mem else 0
105106
@@ -279,3 +280,38 @@ def test_pipeline_required_with_midrange_only(strategy: Literal["greedy", "dp"])
279280 assert len (assigned ) >= 2
280281 total = sum (e - s for _ , s , e in assigned )
281282 assert total == model .num_layers
283+
284+
285+ @pytest .mark .parametrize ("strategy" , ["greedy" , "dp" ])
286+ def test_allocator_does_not_duplicate_leftover_nodes (strategy : Literal ["greedy" , "dp" ]):
287+ """Both allocators should not duplicate self.nodes when left over after allocation.
288+
289+ Greedy: builds multiple pipelines, leaves nodes that can't form another pipeline
290+ DP: optimizes for best pipeline configuration, may leave suboptimal nodes unallocated
291+ """
292+ model = build_model_info (12 )
293+
294+ if strategy == "greedy" :
295+ # Two strong nodes form two complete pipelines (greedy maximizes pipelines)
296+ # One weak node left unallocated
297+ a100_1 = _build_node ("a100-80g" , model , id_suffix = "-a1" )
298+ a100_2 = _build_node ("a100-80g" , model , id_suffix = "-a2" )
299+ r1 = _build_node ("rtx4090" , model , id_suffix = "-1" )
300+ nodes = [a100_1 , a100_2 , r1 ]
301+ expected_node_count = 3
302+ else :
303+ # One strong node handles all layers (DP finds this optimal)
304+ # One weak node left unallocated
305+ a100 = _build_node ("a100-80g" , model , id_suffix = "-a" )
306+ r1 = _build_node ("rtx4090" , model , id_suffix = "-1" )
307+ nodes = [a100 , r1 ]
308+ expected_node_count = 2
309+
310+ alloc = (
311+ GreedyLayerAllocator (model , nodes )
312+ if strategy == "greedy"
313+ else DynamicProgrammingLayerAllocator (model , nodes )
314+ )
315+ ok = alloc .global_allocation ()
316+ assert ok is True
317+ assert len (alloc .nodes ) == expected_node_count , "Should not duplicate nodes during allocation"
0 commit comments