We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 12e4fff commit 43a657aCopy full SHA for 43a657a
1 file changed
fastdeploy/model_executor/models/glm4_mtp.py
@@ -369,3 +369,7 @@ def forward(
369
)
370
371
return hidden_states
372
+
373
+ def clear_grpah_opt_backend(self):
374
+ """Clear graph optimization backend, the captured cuda graph will be cleaned"""
375
+ self.model.clear_grpah_opt_backend(fd_config=self.fd_config)
0 commit comments