File tree Expand file tree Collapse file tree 1 file changed +11
-2
lines changed
QEfficient/finetune/utils Expand file tree Collapse file tree 1 file changed +11
-2
lines changed Original file line number Diff line number Diff line change @@ -123,11 +123,19 @@ def train(
123123 break
124124
125125 if train_config .use_peft and train_config .from_peft_checkpoint :
126- intermediate_epoch = int (train_config .from_peft_checkpoint .split ("/" )[- 2 ].split ("_" )[- 1 ]) - 1
127- intermediate_step = int (train_config .from_peft_checkpoint .split ("/" )[- 1 ].split ("_" )[- 1 ])
126+ try :
127+ intermediate_epoch = int (train_config .from_peft_checkpoint .split ("/" )[- 2 ].split ("_" )[- 1 ]) - 1
128+ intermediate_step = int (train_config .from_peft_checkpoint .split ("/" )[- 1 ].split ("_" )[- 1 ])
129+ except (IndexError , ValueError ):
130+ intermediate_epoch = int (train_config .from_peft_checkpoint .split ("/" )[- 1 ].split ("_" )[- 1 ]) - 1
131+ intermediate_step = 0
132+
128133 if epoch < intermediate_epoch :
129134 logger .log_rank_zero (f"Skipping epoch { epoch + 1 } since fine tuning has already completed for it." )
130135 continue
136+ if intermediate_step == 0 and epoch == intermediate_epoch :
137+ logger .log_rank_zero (f"Skipping epoch { epoch + 1 } , since fine tuning has already completed for it." )
138+ continue
131139
132140 logger .log_rank_zero (f"Starting epoch { epoch + 1 } /{ train_config .num_epochs } " )
133141 if max_steps_reached :
@@ -154,6 +162,7 @@ def train(
154162 # resume training from a particular checkpoint, assuming the dataset is not shuffled
155163 if train_config .use_peft and train_config .from_peft_checkpoint :
156164 # to bring the count of train_step in sync with where it left off
165+
157166 if epoch == intermediate_epoch and step == 0 :
158167 logger .log_rank_zero (
159168 f"Skipping first { intermediate_step } steps for epoch { epoch + 1 } , since fine tuning has already completed for it."
You can’t perform that action at this time.
0 commit comments