diff --git a/sample/sagemaker-runtime/2017-05-13/service-2.json b/sample/sagemaker-runtime/2017-05-13/service-2.json
index 0af24e9..6d1ed99 100644
--- a/sample/sagemaker-runtime/2017-05-13/service-2.json
+++ b/sample/sagemaker-runtime/2017-05-13/service-2.json
@@ -91,6 +91,12 @@
       "pattern":"^[a-zA-Z0-9](-*[a-zA-Z0-9])*"
     },
     "ErrorCode":{"type":"string"},
+    "FilenameHeader":{
+      "type":"string",
+      "max":32,
+      "min":1,
+      "pattern":"^(?!.*\\..*\\.)[a-zA-Z0-9][a-zA-Z0-9-_\\.]*$"
+    },
     "Header":{
       "type":"string",
       "max":1024,
@@ -193,6 +199,18 @@
           "location":"header",
           "locationName":"X-Amzn-SageMaker-InputLocation"
         },
+        "S3OutputPathExtension":{
+          "shape":"S3OutputPathExtensionHeader",
+          "documentation":"<p>The path extension that is appended to the Amazon S3 output path where the inference response payload is stored.</p>",
+          "location":"header",
+          "locationName":"X-Amzn-SageMaker-S3OutputPathExtension"
+        },
+        "Filename":{
+          "shape":"FilenameHeader",
+          "documentation":"<p>The filename for the inference response payload stored in Amazon S3. If not specified, Amazon SageMaker AI generates a filename based on the inference ID.</p>",
+          "location":"header",
+          "locationName":"X-Amzn-SageMaker-Filename"
+        },
         "RequestTTLSeconds":{
           "shape":"RequestTTLSecondsHeader",
           "documentation":"<p>Maximum age in seconds a request can be in the queue before it is marked as expired. The default is 6 hours, or 21,600 seconds.</p>",
@@ -537,6 +555,12 @@
       "documentation":"<p>A stream of payload parts. Each part contains a portion of the response for a streaming inference request.</p>",
       "eventstream":true
     },
+    "S3OutputPathExtensionHeader":{
+      "type":"string",
+      "max":512,
+      "min":1,
+      "pattern":"^(?!s3:|https:)[a-zA-Z0-9!_.*'()/-]+$"
+    },
     "ServiceUnavailable":{
       "type":"structure",
       "members":{
diff --git a/sample/sagemaker/2017-07-24/service-2.json b/sample/sagemaker/2017-07-24/service-2.json
index 192acc3..ec617f0 100644
--- a/sample/sagemaker/2017-07-24/service-2.json
+++ b/sample/sagemaker/2017-07-24/service-2.json
@@ -2548,6 +2548,19 @@
       ],
       "documentation":"<p>Retrieves detailed information about a specific training plan.</p>"
     },
+    "DescribeTrainingPlanExtensionHistory":{
+      "name":"DescribeTrainingPlanExtensionHistory",
+      "http":{
+        "method":"POST",
+        "requestUri":"/"
+      },
+      "input":{"shape":"DescribeTrainingPlanExtensionHistoryRequest"},
+      "output":{"shape":"DescribeTrainingPlanExtensionHistoryResponse"},
+      "errors":[
+        {"shape":"ResourceNotFound"}
+      ],
+      "documentation":"<p>Retrieves the extension history for a specified training plan. The response includes details about each extension, such as the offering ID, start and end dates, status, payment status, and cost information.</p>"
+    },
     "DescribeTransformJob":{
       "name":"DescribeTransformJob",
       "http":{
@@ -2667,6 +2680,19 @@
       "output":{"shape":"EnableSagemakerServicecatalogPortfolioOutput"},
       "documentation":"<p>Enables using Service Catalog in SageMaker. Service Catalog is used to create SageMaker projects.</p>"
     },
+    "ExtendTrainingPlan":{
+      "name":"ExtendTrainingPlan",
+      "http":{
+        "method":"POST",
+        "requestUri":"/"
+      },
+      "input":{"shape":"ExtendTrainingPlanRequest"},
+      "output":{"shape":"ExtendTrainingPlanResponse"},
+      "errors":[
+        {"shape":"ResourceNotFound"}
+      ],
+      "documentation":"<p>Extends an existing training plan by purchasing an extension offering. This allows you to add additional compute capacity time to your training plan without creating a new plan or reconfiguring your workloads.</p> <p>To find available extension offerings, use the <code> <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_SearchTrainingPlanOfferings.html\">SearchTrainingPlanOfferings</a> </code> API with the <code>TrainingPlanArn</code> parameter.</p> <p>To view the history of extensions for a training plan, use the <code> <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_DescribeTrainingPlanExtensionHistory.html\">DescribeTrainingPlanExtensionHistory</a> </code> API.</p>"
+    },
     "GetDeviceFleetReport":{
       "name":"GetDeviceFleetReport",
       "http":{
@@ -6988,6 +7014,20 @@
       "min":1,
       "pattern":"[a-z]+\\-[0-9a-z\\-]+"
     },
+    "AvailabilityZoneBalanceEnforcementMode":{
+      "type":"string",
+      "enum":["PERMISSIVE"]
+    },
+    "AvailabilityZoneBalanceMaxImbalance":{
+      "type":"integer",
+      "box":true,
+      "max":100,
+      "min":0
+    },
+    "AvailabilityZoneId":{
+      "type":"string",
+      "pattern":"[a-z]{3}\\d-az\\d"
+    },
     "AvailableInstanceCount":{
       "type":"integer",
       "box":true,
@@ -9519,6 +9559,7 @@
         "ml.r7i.16xlarge",
         "ml.r7i.24xlarge",
         "ml.r7i.48xlarge",
+        "ml.r5d.16xlarge",
         "ml.p6-b300.48xlarge"
       ]
     },
@@ -12532,8 +12573,7 @@
       "type":"structure",
       "required":[
         "InferenceComponentName",
-        "EndpointName",
-        "Specification"
+        "EndpointName"
       ],
       "members":{
         "InferenceComponentName":{
@@ -12846,6 +12886,15 @@
         "Tags":{
           "shape":"TagList",
           "documentation":"<p>Tags consisting of key-value pairs used to manage metadata for the tracking server.</p>"
+        },
+        "S3BucketOwnerAccountId":{
+          "shape":"AccountId",
+          "documentation":"<p>Expected Amazon Web Services account ID that owns the Amazon S3 bucket for artifact storage. Defaults to caller's account ID if not provided.</p>"
+        },
+        "S3BucketOwnerVerification":{
+          "shape":"Boolean",
+          "documentation":"<p>Enable Amazon S3 Ownership checks when interacting with Amazon S3 buckets from a SageMaker Managed MLflow Tracking Server. Defaults to <code>True</code> if not provided. </p>",
+          "box":true
         }
       }
     },
@@ -18932,7 +18981,16 @@
           "shape":"Timestamp",
           "documentation":"<p>The timestamp of when the described MLflow Tracking Server was last modified.</p>"
         },
-        "LastModifiedBy":{"shape":"UserContext"}
+        "LastModifiedBy":{"shape":"UserContext"},
+        "S3BucketOwnerAccountId":{
+          "shape":"AccountId",
+          "documentation":"<p>Expected Amazon Web Services account ID that owns the Amazon S3 bucket for artifact storage.</p>"
+        },
+        "S3BucketOwnerVerification":{
+          "shape":"Boolean",
+          "documentation":"<p>Whether Amazon S3 Bucket Ownership checks are enabled whenever the tracking server interacts with Amazon Amazon S3.</p>",
+          "box":true
+        }
       }
     },
     "DescribeModelBiasJobDefinitionRequest":{
@@ -20645,6 +20703,38 @@
         }
       }
     },
+    "DescribeTrainingPlanExtensionHistoryRequest":{
+      "type":"structure",
+      "required":["TrainingPlanArn"],
+      "members":{
+        "TrainingPlanArn":{
+          "shape":"TrainingPlanArn",
+          "documentation":"<p>The Amazon Resource Name (ARN); of the training plan to retrieve extension history for.</p>"
+        },
+        "NextToken":{
+          "shape":"NextToken",
+          "documentation":"<p>A token to continue pagination if more results are available.</p>"
+        },
+        "MaxResults":{
+          "shape":"MaxResults",
+          "documentation":"<p>The maximum number of extensions to return in the response.</p>"
+        }
+      }
+    },
+    "DescribeTrainingPlanExtensionHistoryResponse":{
+      "type":"structure",
+      "required":["TrainingPlanExtensions"],
+      "members":{
+        "TrainingPlanExtensions":{
+          "shape":"TrainingPlanExtensions",
+          "documentation":"<p>A list of extensions for the specified training plan.</p>"
+        },
+        "NextToken":{
+          "shape":"NextToken",
+          "documentation":"<p>A token to continue pagination if more results are available.</p>"
+        }
+      }
+    },
     "DescribeTrainingPlanRequest":{
       "type":"structure",
       "required":["TrainingPlanName"],
@@ -23136,6 +23226,26 @@
       },
       "documentation":"<p>A parameter to activate explainers.</p>"
     },
+    "ExtendTrainingPlanRequest":{
+      "type":"structure",
+      "required":["TrainingPlanExtensionOfferingId"],
+      "members":{
+        "TrainingPlanExtensionOfferingId":{
+          "shape":"TrainingPlanExtensionOfferingId",
+          "documentation":"<p>The unique identifier of the extension offering to purchase. You can retrieve this ID from the <code>TrainingPlanExtensionOfferings</code> in the response of the <code>SearchTrainingPlanOfferings</code> API.</p>"
+        }
+      }
+    },
+    "ExtendTrainingPlanResponse":{
+      "type":"structure",
+      "required":["TrainingPlanExtensions"],
+      "members":{
+        "TrainingPlanExtensions":{
+          "shape":"TrainingPlanExtensions",
+          "documentation":"<p>The list of extensions for the training plan, including the newly created extension.</p>"
+        }
+      }
+    },
     "FSxLustreConfig":{
       "type":"structure",
       "required":[
@@ -24290,7 +24400,7 @@
     },
     "HubContentDocument":{
       "type":"string",
-      "max":170391,
+      "max":327680,
       "min":0,
       "pattern":".*"
     },
@@ -25860,6 +25970,21 @@
       "max":2048,
       "min":20
     },
+    "InferenceComponentAvailabilityZoneBalance":{
+      "type":"structure",
+      "required":["EnforcementMode"],
+      "members":{
+        "EnforcementMode":{
+          "shape":"AvailabilityZoneBalanceEnforcementMode",
+          "documentation":"<p>Determines how strictly the Availability Zone balance constraint is enforced.</p> <dl> <dt>PERMISSIVE</dt> <dd> <p>The endpoint attempts to balance copies across Availability Zones but proceeds with scheduling even if balance can't be achieved due to available capacity or instance distribution across Availability Zones.</p> </dd> </dl>"
+        },
+        "MaxImbalance":{
+          "shape":"AvailabilityZoneBalanceMaxImbalance",
+          "documentation":"<p>The maximum allowed difference in the number of inference component copies between any two Availability Zones. This parameter applies only when the endpoint has instances across two or more Availability Zones. A copy placement is allowed if it reduces imbalance or the resulting imbalance is within this value.</p> <p>Default value: <code>0</code>.</p>"
+        }
+      },
+      "documentation":"<p>Configuration for balancing inference component copies across Availability Zones.</p>"
+    },
     "InferenceComponentCapacitySize":{
       "type":"structure",
       "required":[
@@ -26004,6 +26129,13 @@
       "min":0,
       "pattern":"[a-zA-Z0-9-]+"
     },
+    "InferenceComponentPlacementStrategy":{
+      "type":"string",
+      "enum":[
+        "SPREAD",
+        "BINPACK"
+      ]
+    },
     "InferenceComponentRollingUpdatePolicy":{
       "type":"structure",
       "required":[
@@ -26055,6 +26187,21 @@
       },
       "documentation":"<p>Details about the runtime settings for the model that is deployed with the inference component.</p>"
     },
+    "InferenceComponentSchedulingConfig":{
+      "type":"structure",
+      "required":["PlacementStrategy"],
+      "members":{
+        "PlacementStrategy":{
+          "shape":"InferenceComponentPlacementStrategy",
+          "documentation":"<p>The strategy for placing inference component copies across available instances. If you also set <code>AvailabilityZoneBalance</code>, this strategy applies to placement within each Availability Zone.</p> <dl> <dt>SPREAD</dt> <dd> <p>Distributes copies evenly across available instances for better resilience.</p> </dd> <dt>BINPACK</dt> <dd> <p>Packs copies onto fewer instances to optimize resource utilization.</p> </dd> </dl>"
+        },
+        "AvailabilityZoneBalance":{
+          "shape":"InferenceComponentAvailabilityZoneBalance",
+          "documentation":"<p>Configuration for balancing inference component copies across Availability Zones.</p>"
+        }
+      },
+      "documentation":"<p>The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.</p>"
+    },
     "InferenceComponentSortKey":{
       "type":"string",
       "enum":[
@@ -26089,6 +26236,10 @@
         "DataCacheConfig":{
           "shape":"InferenceComponentDataCacheConfig",
           "documentation":"<p>Settings that affect how the inference component caches data.</p>"
+        },
+        "SchedulingConfig":{
+          "shape":"InferenceComponentSchedulingConfig",
+          "documentation":"<p>The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.</p>"
         }
       },
       "documentation":"<p>Details about the resources to deploy with this inference component, including the model, container, and compute resources.</p>"
@@ -26119,6 +26270,10 @@
         "DataCacheConfig":{
           "shape":"InferenceComponentDataCacheConfigSummary",
           "documentation":"<p>Settings that affect how the inference component caches data.</p>"
+        },
+        "SchedulingConfig":{
+          "shape":"InferenceComponentSchedulingConfig",
+          "documentation":"<p>The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.</p>"
         }
       },
       "documentation":"<p>Details about the resources that are deployed with this inference component.</p>"
@@ -32219,16 +32374,35 @@
       "min":0,
       "pattern":"\\d+\\.\\d+"
     },
+    "ManagedInstanceScalingCooldownInMinutes":{
+      "type":"integer",
+      "box":true,
+      "max":1440,
+      "min":5
+    },
     "ManagedInstanceScalingMaxInstanceCount":{
       "type":"integer",
       "box":true,
       "min":1
     },
+    "ManagedInstanceScalingMaximumStepSize":{
+      "type":"integer",
+      "box":true,
+      "max":100,
+      "min":1
+    },
     "ManagedInstanceScalingMinInstanceCount":{
       "type":"integer",
       "box":true,
       "min":0
     },
+    "ManagedInstanceScalingScaleInStrategy":{
+      "type":"string",
+      "enum":[
+        "IDLE_RELEASE",
+        "CONSOLIDATION"
+      ]
+    },
     "ManagedInstanceScalingStatus":{
       "type":"string",
       "enum":[
@@ -38041,7 +38215,7 @@
         },
         "InferenceAmiVersion":{
           "shape":"ProductionVariantInferenceAmiVersion",
-          "documentation":"<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> <p>Accelerator: Inferentia2 and Trainium</p> </li> <li> <p>Neuron driver version: 2.19</p> </li> </ul> </dd> </dl>"
+          "documentation":"<p>Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.</p> <p>By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions.</p> <p>The AMI version names, and their configurations, are the following:</p> <dl> <dt>al2-ami-sagemaker-inference-gpu-2</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-2-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 535</p> </li> <li> <p>CUDA version: 12.2</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-gpu-3-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 550</p> </li> <li> <p>CUDA version: 12.4</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2023-ami-sagemaker-inference-gpu-4-1</dt> <dd> <ul> <li> <p>Accelerator: GPU</p> </li> <li> <p>NVIDIA driver version: 580</p> </li> <li> <p>CUDA version: 13.0</p> </li> <li> <p>NVIDIA Container Toolkit with disabled CUDA-compat mounting</p> </li> </ul> </dd> <dt>al2-ami-sagemaker-inference-neuron-2</dt> <dd> <ul> <li> <p>Accelerator: Inferentia2 and Trainium</p> </li> <li> <p>Neuron driver version: 2.19</p> </li> </ul> </dd> </dl>"
         },
         "CapacityReservationConfig":{
           "shape":"ProductionVariantCapacityReservationConfig",
@@ -38132,7 +38306,8 @@
         "al2-ami-sagemaker-inference-gpu-2",
         "al2-ami-sagemaker-inference-gpu-2-1",
         "al2-ami-sagemaker-inference-gpu-3-1",
-        "al2-ami-sagemaker-inference-neuron-2"
+        "al2-ami-sagemaker-inference-neuron-2",
+        "al2023-ami-sagemaker-inference-gpu-4-1"
       ]
     },
     "ProductionVariantInstanceType":{
@@ -38266,6 +38441,12 @@
         "ml.g6e.16xlarge",
         "ml.g6e.24xlarge",
         "ml.g6e.48xlarge",
+        "ml.g7e.2xlarge",
+        "ml.g7e.4xlarge",
+        "ml.g7e.8xlarge",
+        "ml.g7e.12xlarge",
+        "ml.g7e.24xlarge",
+        "ml.g7e.48xlarge",
         "ml.p4d.24xlarge",
         "ml.c7g.large",
         "ml.c7g.xlarge",
@@ -38400,6 +38581,7 @@
         "ml.c6in.24xlarge",
         "ml.c6in.32xlarge",
         "ml.p6-b200.48xlarge",
+        "ml.p6-b300.48xlarge",
         "ml.p6e-gb200.36xlarge",
         "ml.p5.4xlarge"
       ]
@@ -38424,10 +38606,33 @@
         "MaxInstanceCount":{
           "shape":"ManagedInstanceScalingMaxInstanceCount",
           "documentation":"<p>The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.</p>"
+        },
+        "ScaleInPolicy":{
+          "shape":"ProductionVariantManagedInstanceScalingScaleInPolicy",
+          "documentation":"<p>Configures the scale-in behavior for managed instance scaling.</p>"
         }
       },
       "documentation":"<p>Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic. </p>"
     },
+    "ProductionVariantManagedInstanceScalingScaleInPolicy":{
+      "type":"structure",
+      "required":["Strategy"],
+      "members":{
+        "Strategy":{
+          "shape":"ManagedInstanceScalingScaleInStrategy",
+          "documentation":"<p>The strategy for scaling in instances.</p> <dl> <dt>IDLE_RELEASE</dt> <dd> <p>Releases instances that have no hosted inference component copies.</p> </dd> <dt>CONSOLIDATION</dt> <dd> <p>Consolidates inference component copies onto fewer instances to release more instances. Consolidation honors the scheduling configuration of each inference component. For example, if an inference component specifies Availability Zone balance, consolidation only proceeds when the resulting distribution does not increase the imbalance.</p> </dd> </dl>"
+        },
+        "MaximumStepSize":{
+          "shape":"ManagedInstanceScalingMaximumStepSize",
+          "documentation":"<p>The maximum number of instances that the endpoint can terminate at a time during a consolidation scale-in operation.</p> <p>Default value: <code>1</code>.</p>"
+        },
+        "CooldownInMinutes":{
+          "shape":"ManagedInstanceScalingCooldownInMinutes",
+          "documentation":"<p>The cooldown period, in minutes, after the last endpoint operation before the endpoint evaluates consolidation scale-in opportunities.</p> <p>Default value: <code>20</code>.</p>"
+        }
+      },
+      "documentation":"<p>Configures the scale-in behavior for managed instance scaling.</p>"
+    },
     "ProductionVariantModelDataDownloadTimeoutInSeconds":{
       "type":"integer",
       "box":true,
@@ -39998,6 +40203,14 @@
         "EndTime":{
           "shape":"Timestamp",
           "documentation":"<p>The end time of the reserved capacity offering.</p>"
+        },
+        "ExtensionStartTime":{
+          "shape":"Timestamp",
+          "documentation":"<p>The start time of the extension for the reserved capacity offering.</p>"
+        },
+        "ExtensionEndTime":{
+          "shape":"Timestamp",
+          "documentation":"<p>The end time of the extension for the reserved capacity offering.</p>"
         }
       },
       "documentation":"<p>Details about a reserved capacity offering for a training plan offering.</p> <p>For more information about how to reserve GPU capacity for your SageMaker HyperPod clusters using Amazon SageMaker Training Plan, see <code> <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_CreateTrainingPlan.html\">CreateTrainingPlan</a> </code>.</p>"
@@ -41101,6 +41314,10 @@
         "TargetResources":{
           "shape":"SageMakerResourceNames",
           "documentation":"<p>The target resources (e.g., SageMaker Training Jobs, SageMaker HyperPod, SageMaker Endpoints) to search for in the offerings.</p> <p>Training plans are specific to their target resource.</p> <ul> <li> <p>A training plan designed for SageMaker training jobs can only be used to schedule and run training jobs.</p> </li> <li> <p>A training plan for HyperPod clusters can be used exclusively to provide compute resources to a cluster's instance group.</p> </li> <li> <p>A training plan for SageMaker endpoints can be used exclusively to provide compute resources to SageMaker endpoints for model deployment.</p> </li> </ul>"
+        },
+        "TrainingPlanArn":{
+          "shape":"String",
+          "documentation":"<p>The Amazon Resource Name (ARN); of an existing training plan to search for extension offerings. When specified, the API returns extension offerings that can be used to extend the specified training plan.</p>"
         }
       }
     },
@@ -41111,6 +41328,10 @@
         "TrainingPlanOfferings":{
           "shape":"TrainingPlanOfferings",
           "documentation":"<p>A list of training plan offerings that match the search criteria.</p>"
+        },
+        "TrainingPlanExtensionOfferings":{
+          "shape":"TrainingPlanExtensionOfferings",
+          "documentation":"<p>A list of extension offerings available for the specified training plan. These offerings can be used with the <code> <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ExtendTrainingPlan.html\">ExtendTrainingPlan</a> </code> API to extend an existing training plan.</p>"
         }
       }
     },
@@ -44025,6 +44246,109 @@
       "max":59,
       "min":0
     },
+    "TrainingPlanExtension":{
+      "type":"structure",
+      "required":["TrainingPlanExtensionOfferingId"],
+      "members":{
+        "TrainingPlanExtensionOfferingId":{
+          "shape":"TrainingPlanExtensionOfferingId",
+          "documentation":"<p>The unique identifier of the extension offering that was used to create this extension.</p>"
+        },
+        "ExtendedAt":{
+          "shape":"Timestamp",
+          "documentation":"<p>The timestamp when the extension was created.</p>"
+        },
+        "StartDate":{
+          "shape":"Timestamp",
+          "documentation":"<p>The start date of the extension period.</p>"
+        },
+        "EndDate":{
+          "shape":"Timestamp",
+          "documentation":"<p>The end date of the extension period.</p>"
+        },
+        "Status":{
+          "shape":"String256",
+          "documentation":"<p>The current status of the extension (e.g., Pending, Active, Scheduled, Failed, Expired).</p>"
+        },
+        "PaymentStatus":{
+          "shape":"String256",
+          "documentation":"<p>The payment processing status of the extension.</p>"
+        },
+        "AvailabilityZone":{
+          "shape":"String256",
+          "documentation":"<p>The Availability Zone of the extension.</p>"
+        },
+        "AvailabilityZoneId":{
+          "shape":"AvailabilityZoneId",
+          "documentation":"<p>The Availability Zone ID of the extension.</p>"
+        },
+        "DurationHours":{
+          "shape":"TrainingPlanExtensionDurationHours",
+          "documentation":"<p>The duration of the extension in hours.</p>"
+        },
+        "UpfrontFee":{
+          "shape":"String256",
+          "documentation":"<p>The upfront fee for the extension.</p>"
+        },
+        "CurrencyCode":{
+          "shape":"CurrencyCode",
+          "documentation":"<p>The currency code for the upfront fee (e.g., USD).</p>"
+        }
+      },
+      "documentation":"<p>Details about an extension to a training plan, including the offering ID, dates, status, and cost information.</p>"
+    },
+    "TrainingPlanExtensionDurationHours":{
+      "type":"integer",
+      "box":true,
+      "max":4368,
+      "min":0
+    },
+    "TrainingPlanExtensionOffering":{
+      "type":"structure",
+      "required":["TrainingPlanExtensionOfferingId"],
+      "members":{
+        "TrainingPlanExtensionOfferingId":{
+          "shape":"TrainingPlanExtensionOfferingId",
+          "documentation":"<p>The unique identifier for this extension offering.</p>"
+        },
+        "AvailabilityZone":{
+          "shape":"String256",
+          "documentation":"<p>The Availability Zone for this extension offering.</p>"
+        },
+        "StartDate":{
+          "shape":"Timestamp",
+          "documentation":"<p>The start date of this extension offering.</p>"
+        },
+        "EndDate":{
+          "shape":"Timestamp",
+          "documentation":"<p>The end date of this extension offering.</p>"
+        },
+        "DurationHours":{
+          "shape":"TrainingPlanExtensionDurationHours",
+          "documentation":"<p>The duration of this extension offering in hours.</p>"
+        },
+        "UpfrontFee":{
+          "shape":"String256",
+          "documentation":"<p>The upfront fee for this extension offering.</p>"
+        },
+        "CurrencyCode":{
+          "shape":"CurrencyCode",
+          "documentation":"<p>The currency code for the upfront fee (e.g., USD).</p>"
+        }
+      },
+      "documentation":"<p>Details about an available extension offering for a training plan. Use the offering ID with the <code> <a href=\"https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_ExtendTrainingPlan.html\">ExtendTrainingPlan</a> </code> API to extend a training plan.</p>"
+    },
+    "TrainingPlanExtensionOfferingId":{"type":"string"},
+    "TrainingPlanExtensionOfferings":{
+      "type":"list",
+      "member":{"shape":"TrainingPlanExtensionOffering"},
+      "min":0
+    },
+    "TrainingPlanExtensions":{
+      "type":"list",
+      "member":{"shape":"TrainingPlanExtension"},
+      "min":0
+    },
     "TrainingPlanFilter":{
       "type":"structure",
       "required":[
@@ -46549,6 +46873,15 @@
         "WeeklyMaintenanceWindowStart":{
           "shape":"WeeklyMaintenanceWindowStart",
           "documentation":"<p>The new weekly maintenance window start day and time to update. The maintenance window day and time should be in Coordinated Universal Time (UTC) 24-hour standard time. For example: TUE:03:30.</p>"
+        },
+        "S3BucketOwnerAccountId":{
+          "shape":"AccountId",
+          "documentation":"<p>The new expected Amazon Web Services account ID that owns the Amazon S3 bucket for artifact storage.</p>"
+        },
+        "S3BucketOwnerVerification":{
+          "shape":"Boolean",
+          "documentation":"<p>Whether to enable or disable Amazon S3 Bucket Owenrship Verifaction whenever the MLflow Tracking Server interacts with Amazon Amazon S3.</p>",
+          "box":true
         }
       }
     },
diff --git a/src/sagemaker_core/main/code_injection/shape_dag.py b/src/sagemaker_core/main/code_injection/shape_dag.py
index 5a7273b..96dbfa8 100644
--- a/src/sagemaker_core/main/code_injection/shape_dag.py
+++ b/src/sagemaker_core/main/code_injection/shape_dag.py
@@ -3210,6 +3210,8 @@
                 "type": "string",
             },
             {"name": "Tags", "shape": "TagList", "type": "list"},
+            {"name": "S3BucketOwnerAccountId", "shape": "AccountId", "type": "string"},
+            {"name": "S3BucketOwnerVerification", "shape": "Boolean", "type": "boolean"},
         ],
         "type": "structure",
     },
@@ -6001,6 +6003,8 @@
             {"name": "CreatedBy", "shape": "UserContext", "type": "structure"},
             {"name": "LastModifiedTime", "shape": "Timestamp", "type": "timestamp"},
             {"name": "LastModifiedBy", "shape": "UserContext", "type": "structure"},
+            {"name": "S3BucketOwnerAccountId", "shape": "AccountId", "type": "string"},
+            {"name": "S3BucketOwnerVerification", "shape": "Boolean", "type": "boolean"},
         ],
         "type": "structure",
     },
@@ -6830,6 +6834,21 @@
         ],
         "type": "structure",
     },
+    "DescribeTrainingPlanExtensionHistoryRequest": {
+        "members": [
+            {"name": "TrainingPlanArn", "shape": "TrainingPlanArn", "type": "string"},
+            {"name": "NextToken", "shape": "NextToken", "type": "string"},
+            {"name": "MaxResults", "shape": "MaxResults", "type": "integer"},
+        ],
+        "type": "structure",
+    },
+    "DescribeTrainingPlanExtensionHistoryResponse": {
+        "members": [
+            {"name": "TrainingPlanExtensions", "shape": "TrainingPlanExtensions", "type": "list"},
+            {"name": "NextToken", "shape": "NextToken", "type": "string"},
+        ],
+        "type": "structure",
+    },
     "DescribeTrainingPlanRequest": {
         "members": [{"name": "TrainingPlanName", "shape": "TrainingPlanName", "type": "string"}],
         "type": "structure",
@@ -7741,6 +7760,22 @@
         ],
         "type": "structure",
     },
+    "ExtendTrainingPlanRequest": {
+        "members": [
+            {
+                "name": "TrainingPlanExtensionOfferingId",
+                "shape": "TrainingPlanExtensionOfferingId",
+                "type": "string",
+            }
+        ],
+        "type": "structure",
+    },
+    "ExtendTrainingPlanResponse": {
+        "members": [
+            {"name": "TrainingPlanExtensions", "shape": "TrainingPlanExtensions", "type": "list"}
+        ],
+        "type": "structure",
+    },
     "FSxLustreConfig": {
         "members": [
             {"name": "SizeInGiB", "shape": "FSxLustreSizeInGiB", "type": "integer"},
@@ -8812,6 +8847,21 @@
         ],
         "type": "structure",
     },
+    "InferenceComponentAvailabilityZoneBalance": {
+        "members": [
+            {
+                "name": "EnforcementMode",
+                "shape": "AvailabilityZoneBalanceEnforcementMode",
+                "type": "string",
+            },
+            {
+                "name": "MaxImbalance",
+                "shape": "AvailabilityZoneBalanceMaxImbalance",
+                "type": "integer",
+            },
+        ],
+        "type": "structure",
+    },
     "InferenceComponentCapacitySize": {
         "members": [
             {"name": "Type", "shape": "InferenceComponentCapacitySizeType", "type": "string"},
@@ -8909,6 +8959,21 @@
         ],
         "type": "structure",
     },
+    "InferenceComponentSchedulingConfig": {
+        "members": [
+            {
+                "name": "PlacementStrategy",
+                "shape": "InferenceComponentPlacementStrategy",
+                "type": "string",
+            },
+            {
+                "name": "AvailabilityZoneBalance",
+                "shape": "InferenceComponentAvailabilityZoneBalance",
+                "type": "structure",
+            },
+        ],
+        "type": "structure",
+    },
     "InferenceComponentSpecification": {
         "members": [
             {"name": "ModelName", "shape": "ModelName", "type": "string"},
@@ -8937,6 +9002,11 @@
                 "shape": "InferenceComponentDataCacheConfig",
                 "type": "structure",
             },
+            {
+                "name": "SchedulingConfig",
+                "shape": "InferenceComponentSchedulingConfig",
+                "type": "structure",
+            },
         ],
         "type": "structure",
     },
@@ -8968,6 +9038,11 @@
                 "shape": "InferenceComponentDataCacheConfigSummary",
                 "type": "structure",
             },
+            {
+                "name": "SchedulingConfig",
+                "shape": "InferenceComponentSchedulingConfig",
+                "type": "structure",
+            },
         ],
         "type": "structure",
     },
@@ -9255,6 +9330,12 @@
             {"name": "CustomAttributes", "shape": "CustomAttributesHeader", "type": "string"},
             {"name": "InferenceId", "shape": "InferenceId", "type": "string"},
             {"name": "InputLocation", "shape": "InputLocationHeader", "type": "string"},
+            {
+                "name": "S3OutputPathExtension",
+                "shape": "S3OutputPathExtensionHeader",
+                "type": "string",
+            },
+            {"name": "Filename", "shape": "FilenameHeader", "type": "string"},
             {"name": "RequestTTLSeconds", "shape": "RequestTTLSecondsHeader", "type": "integer"},
             {
                 "name": "InvocationTimeoutSeconds",
@@ -13921,6 +14002,31 @@
                 "shape": "ManagedInstanceScalingMaxInstanceCount",
                 "type": "integer",
             },
+            {
+                "name": "ScaleInPolicy",
+                "shape": "ProductionVariantManagedInstanceScalingScaleInPolicy",
+                "type": "structure",
+            },
+        ],
+        "type": "structure",
+    },
+    "ProductionVariantManagedInstanceScalingScaleInPolicy": {
+        "members": [
+            {
+                "name": "Strategy",
+                "shape": "ManagedInstanceScalingScaleInStrategy",
+                "type": "string",
+            },
+            {
+                "name": "MaximumStepSize",
+                "shape": "ManagedInstanceScalingMaximumStepSize",
+                "type": "integer",
+            },
+            {
+                "name": "CooldownInMinutes",
+                "shape": "ManagedInstanceScalingCooldownInMinutes",
+                "type": "integer",
+            },
         ],
         "type": "structure",
     },
@@ -14574,6 +14680,8 @@
             {"name": "DurationMinutes", "shape": "ReservedCapacityDurationMinutes", "type": "long"},
             {"name": "StartTime", "shape": "Timestamp", "type": "timestamp"},
             {"name": "EndTime", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "ExtensionStartTime", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "ExtensionEndTime", "shape": "Timestamp", "type": "timestamp"},
         ],
         "type": "structure",
     },
@@ -14994,12 +15102,18 @@
             {"name": "EndTimeBefore", "shape": "Timestamp", "type": "timestamp"},
             {"name": "DurationHours", "shape": "TrainingPlanDurationHoursInput", "type": "long"},
             {"name": "TargetResources", "shape": "SageMakerResourceNames", "type": "list"},
+            {"name": "TrainingPlanArn", "shape": "String", "type": "string"},
         ],
         "type": "structure",
     },
     "SearchTrainingPlanOfferingsResponse": {
         "members": [
-            {"name": "TrainingPlanOfferings", "shape": "TrainingPlanOfferings", "type": "list"}
+            {"name": "TrainingPlanOfferings", "shape": "TrainingPlanOfferings", "type": "list"},
+            {
+                "name": "TrainingPlanExtensionOfferings",
+                "shape": "TrainingPlanExtensionOfferings",
+                "type": "list",
+            },
         ],
         "type": "structure",
     },
@@ -15978,6 +16092,60 @@
         "member_type": "string",
         "type": "list",
     },
+    "TrainingPlanExtension": {
+        "members": [
+            {
+                "name": "TrainingPlanExtensionOfferingId",
+                "shape": "TrainingPlanExtensionOfferingId",
+                "type": "string",
+            },
+            {"name": "ExtendedAt", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "StartDate", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "EndDate", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "Status", "shape": "String256", "type": "string"},
+            {"name": "PaymentStatus", "shape": "String256", "type": "string"},
+            {"name": "AvailabilityZone", "shape": "String256", "type": "string"},
+            {"name": "AvailabilityZoneId", "shape": "AvailabilityZoneId", "type": "string"},
+            {
+                "name": "DurationHours",
+                "shape": "TrainingPlanExtensionDurationHours",
+                "type": "integer",
+            },
+            {"name": "UpfrontFee", "shape": "String256", "type": "string"},
+            {"name": "CurrencyCode", "shape": "CurrencyCode", "type": "string"},
+        ],
+        "type": "structure",
+    },
+    "TrainingPlanExtensionOffering": {
+        "members": [
+            {
+                "name": "TrainingPlanExtensionOfferingId",
+                "shape": "TrainingPlanExtensionOfferingId",
+                "type": "string",
+            },
+            {"name": "AvailabilityZone", "shape": "String256", "type": "string"},
+            {"name": "StartDate", "shape": "Timestamp", "type": "timestamp"},
+            {"name": "EndDate", "shape": "Timestamp", "type": "timestamp"},
+            {
+                "name": "DurationHours",
+                "shape": "TrainingPlanExtensionDurationHours",
+                "type": "integer",
+            },
+            {"name": "UpfrontFee", "shape": "String256", "type": "string"},
+            {"name": "CurrencyCode", "shape": "CurrencyCode", "type": "string"},
+        ],
+        "type": "structure",
+    },
+    "TrainingPlanExtensionOfferings": {
+        "member_shape": "TrainingPlanExtensionOffering",
+        "member_type": "structure",
+        "type": "list",
+    },
+    "TrainingPlanExtensions": {
+        "member_shape": "TrainingPlanExtension",
+        "member_type": "structure",
+        "type": "list",
+    },
     "TrainingPlanFilter": {
         "members": [
             {"name": "Name", "shape": "TrainingPlanFilterName", "type": "string"},
@@ -17024,6 +17192,8 @@
                 "shape": "WeeklyMaintenanceWindowStart",
                 "type": "string",
             },
+            {"name": "S3BucketOwnerAccountId", "shape": "AccountId", "type": "string"},
+            {"name": "S3BucketOwnerVerification", "shape": "Boolean", "type": "boolean"},
         ],
         "type": "structure",
     },
diff --git a/src/sagemaker_core/main/config_schema.py b/src/sagemaker_core/main/config_schema.py
index d22001a..47fa38e 100644
--- a/src/sagemaker_core/main/config_schema.py
+++ b/src/sagemaker_core/main/config_schema.py
@@ -467,7 +467,11 @@
                                 },
                                 "MlflowTrackingServer": {
                                     "type": "object",
-                                    "properties": {"role_arn": {"type": "string"}},
+                                    "properties": {
+                                        "role_arn": {"type": "string"},
+                                        "s3_bucket_owner_account_id": {"type": "string"},
+                                        "s3_bucket_owner_verification": {"type": "boolean"},
+                                    },
                                 },
                                 "Model": {
                                     "type": "object",
diff --git a/src/sagemaker_core/main/resources.py b/src/sagemaker_core/main/resources.py
index e4384bf..4315562 100644
--- a/src/sagemaker_core/main/resources.py
+++ b/src/sagemaker_core/main/resources.py
@@ -9533,6 +9533,8 @@ def invoke_async(
         accept: Optional[str] = Unassigned(),
         custom_attributes: Optional[str] = Unassigned(),
         inference_id: Optional[str] = Unassigned(),
+        s3_output_path_extension: Optional[str] = Unassigned(),
+        filename: Optional[str] = Unassigned(),
         request_ttl_seconds: Optional[int] = Unassigned(),
         invocation_timeout_seconds: Optional[int] = Unassigned(),
         session: Optional[Session] = None,
@@ -9547,6 +9549,8 @@ def invoke_async(
             accept: The desired MIME type of the inference response from the model container.
             custom_attributes: Provides additional information about a request for an inference submitted to a model hosted at an Amazon SageMaker AI endpoint. The information is an opaque value that is forwarded verbatim. You could use this value, for example, to provide an ID that you can use to track a request or to provide other metadata that a service endpoint was programmed to process. The value must consist of no more than 1024 visible US-ASCII characters as specified in Section 3.3.6. Field Value Components of the Hypertext Transfer Protocol (HTTP/1.1).  The code in your model is responsible for setting or updating any custom attributes in the response. If your code does not set this value in the response, an empty value is returned. For example, if a custom attribute represents the trace ID, your model can prepend the custom attribute with Trace ID: in your post-processing function.  This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker AI Python SDK.
             inference_id: The identifier for the inference request. Amazon SageMaker AI will generate an identifier for you if none is specified.
+            s3_output_path_extension: The path extension that is appended to the Amazon S3 output path where the inference response payload is stored.
+            filename: The filename for the inference response payload stored in Amazon S3. If not specified, Amazon SageMaker AI generates a filename based on the inference ID.
             request_ttl_seconds: Maximum age in seconds a request can be in the queue before it is marked as expired. The default is 6 hours, or 21,600 seconds.
             invocation_timeout_seconds: Maximum amount of time in seconds a request can be processed before it is marked as expired. The default is 15 minutes, or 900 seconds.
             session: Boto3 session.
@@ -9577,6 +9581,8 @@ def invoke_async(
             "CustomAttributes": custom_attributes,
             "InferenceId": inference_id,
             "InputLocation": input_location,
+            "S3OutputPathExtension": s3_output_path_extension,
+            "Filename": filename,
             "RequestTTLSeconds": request_ttl_seconds,
             "InvocationTimeoutSeconds": invocation_timeout_seconds,
         }
@@ -15381,8 +15387,8 @@ def create(
         cls,
         inference_component_name: str,
         endpoint_name: Union[str, object],
-        specification: shapes.InferenceComponentSpecification,
         variant_name: Optional[str] = Unassigned(),
+        specification: Optional[shapes.InferenceComponentSpecification] = Unassigned(),
         runtime_config: Optional[shapes.InferenceComponentRuntimeConfig] = Unassigned(),
         tags: Optional[List[shapes.Tag]] = Unassigned(),
         session: Optional[Session] = None,
@@ -15394,8 +15400,8 @@ def create(
         Parameters:
             inference_component_name: A unique name to assign to the inference component.
             endpoint_name: The name of an existing endpoint where you host the inference component.
-            specification: Details about the resources to deploy with this inference component, including the model, container, and compute resources.
             variant_name: The name of an existing production variant where you host the inference component.
+            specification: Details about the resources to deploy with this inference component, including the model, container, and compute resources.
             runtime_config: Runtime settings for a model that is deployed with an inference component.
             tags: A list of key-value pairs associated with the model. For more information, see Tagging Amazon Web Services resources in the Amazon Web Services General Reference.
             session: Boto3 session.
@@ -18160,6 +18166,8 @@ class MlflowTrackingServer(Base):
         created_by:
         last_modified_time: The timestamp of when the described MLflow Tracking Server was last modified.
         last_modified_by:
+        s3_bucket_owner_account_id: Expected Amazon Web Services account ID that owns the Amazon S3 bucket for artifact storage.
+        s3_bucket_owner_verification: Whether Amazon S3 Bucket Ownership checks are enabled whenever the tracking server interacts with Amazon Amazon S3.
 
     """
 
@@ -18179,6 +18187,8 @@ class MlflowTrackingServer(Base):
     created_by: Optional[shapes.UserContext] = Unassigned()
     last_modified_time: Optional[datetime.datetime] = Unassigned()
     last_modified_by: Optional[shapes.UserContext] = Unassigned()
+    s3_bucket_owner_account_id: Optional[str] = Unassigned()
+    s3_bucket_owner_verification: Optional[bool] = Unassigned()
 
     def get_name(self) -> str:
         attributes = vars(self)
@@ -18222,6 +18232,8 @@ def create(
         automatic_model_registration: Optional[bool] = Unassigned(),
         weekly_maintenance_window_start: Optional[str] = Unassigned(),
         tags: Optional[List[shapes.Tag]] = Unassigned(),
+        s3_bucket_owner_account_id: Optional[str] = Unassigned(),
+        s3_bucket_owner_verification: Optional[bool] = Unassigned(),
         session: Optional[Session] = None,
         region: Optional[str] = None,
     ) -> Optional["MlflowTrackingServer"]:
@@ -18237,6 +18249,8 @@ def create(
             automatic_model_registration: Whether to enable or disable automatic registration of new MLflow models to the SageMaker Model Registry. To enable automatic model registration, set this value to True. To disable automatic model registration, set this value to False. If not specified, AutomaticModelRegistration defaults to False.
             weekly_maintenance_window_start: The day and time of the week in Coordinated Universal Time (UTC) 24-hour standard time that weekly maintenance updates are scheduled. For example: TUE:03:30.
             tags: Tags consisting of key-value pairs used to manage metadata for the tracking server.
+            s3_bucket_owner_account_id: Expected Amazon Web Services account ID that owns the Amazon S3 bucket for artifact storage. Defaults to caller's account ID if not provided.
+            s3_bucket_owner_verification: Enable Amazon S3 Ownership checks when interacting with Amazon S3 buckets from a SageMaker Managed MLflow Tracking Server. Defaults to True if not provided.
             session: Boto3 session.
             region: Region name.
 
@@ -18273,6 +18287,8 @@ def create(
             "AutomaticModelRegistration": automatic_model_registration,
             "WeeklyMaintenanceWindowStart": weekly_maintenance_window_start,
             "Tags": tags,
+            "S3BucketOwnerAccountId": s3_bucket_owner_account_id,
+            "S3BucketOwnerVerification": s3_bucket_owner_verification,
         }
 
         operation_input_args = Base.populate_chained_attributes(
@@ -18386,6 +18402,8 @@ def update(
         tracking_server_size: Optional[str] = Unassigned(),
         automatic_model_registration: Optional[bool] = Unassigned(),
         weekly_maintenance_window_start: Optional[str] = Unassigned(),
+        s3_bucket_owner_account_id: Optional[str] = Unassigned(),
+        s3_bucket_owner_verification: Optional[bool] = Unassigned(),
     ) -> Optional["MlflowTrackingServer"]:
         """
         Update a MlflowTrackingServer resource
@@ -18417,6 +18435,8 @@ def update(
             "TrackingServerSize": tracking_server_size,
             "AutomaticModelRegistration": automatic_model_registration,
             "WeeklyMaintenanceWindowStart": weekly_maintenance_window_start,
+            "S3BucketOwnerAccountId": s3_bucket_owner_account_id,
+            "S3BucketOwnerVerification": s3_bucket_owner_verification,
         }
         logger.debug(f"Input request: {operation_input_args}")
         # serialize the input request
diff --git a/src/sagemaker_core/main/shapes.py b/src/sagemaker_core/main/shapes.py
index e29ae51..863a94e 100644
--- a/src/sagemaker_core/main/shapes.py
+++ b/src/sagemaker_core/main/shapes.py
@@ -6044,6 +6044,23 @@ class ProductionVariantServerlessConfig(Base):
     provisioned_concurrency: Optional[int] = Unassigned()
 
 
+class ProductionVariantManagedInstanceScalingScaleInPolicy(Base):
+    """
+    ProductionVariantManagedInstanceScalingScaleInPolicy
+      Configures the scale-in behavior for managed instance scaling.
+
+    Attributes
+    ----------------------
+    strategy: The strategy for scaling in instances.  IDLE_RELEASE  Releases instances that have no hosted inference component copies.  CONSOLIDATION  Consolidates inference component copies onto fewer instances to release more instances. Consolidation honors the scheduling configuration of each inference component. For example, if an inference component specifies Availability Zone balance, consolidation only proceeds when the resulting distribution does not increase the imbalance.
+    maximum_step_size: The maximum number of instances that the endpoint can terminate at a time during a consolidation scale-in operation. Default value: 1.
+    cooldown_in_minutes: The cooldown period, in minutes, after the last endpoint operation before the endpoint evaluates consolidation scale-in opportunities. Default value: 20.
+    """
+
+    strategy: str
+    maximum_step_size: Optional[int] = Unassigned()
+    cooldown_in_minutes: Optional[int] = Unassigned()
+
+
 class ProductionVariantManagedInstanceScaling(Base):
     """
     ProductionVariantManagedInstanceScaling
@@ -6054,11 +6071,13 @@ class ProductionVariantManagedInstanceScaling(Base):
     status: Indicates whether managed instance scaling is enabled.
     min_instance_count: The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
     max_instance_count: The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
+    scale_in_policy: Configures the scale-in behavior for managed instance scaling.
     """
 
     status: Optional[str] = Unassigned()
     min_instance_count: Optional[int] = Unassigned()
     max_instance_count: Optional[int] = Unassigned()
+    scale_in_policy: Optional[ProductionVariantManagedInstanceScalingScaleInPolicy] = Unassigned()
 
 
 class ProductionVariantRoutingConfig(Base):
@@ -6110,7 +6129,7 @@ class ProductionVariant(Base):
     enable_ssm_access:  You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoint. You can turn on or turn off SSM access for a production variant behind an existing endpoint by creating a new endpoint configuration and calling UpdateEndpoint.
     managed_instance_scaling: Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
     routing_config: Settings that control how the endpoint routes incoming traffic to the instances that the endpoint hosts.
-    inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads. By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions. The AMI version names, and their configurations, are the following:  al2-ami-sagemaker-inference-gpu-2    Accelerator: GPU   NVIDIA driver version: 535   CUDA version: 12.2    al2-ami-sagemaker-inference-gpu-2-1    Accelerator: GPU   NVIDIA driver version: 535   CUDA version: 12.2   NVIDIA Container Toolkit with disabled CUDA-compat mounting    al2-ami-sagemaker-inference-gpu-3-1    Accelerator: GPU   NVIDIA driver version: 550   CUDA version: 12.4   NVIDIA Container Toolkit with disabled CUDA-compat mounting    al2-ami-sagemaker-inference-neuron-2    Accelerator: Inferentia2 and Trainium   Neuron driver version: 2.19
+    inference_ami_version: Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads. By selecting an AMI version, you can ensure that your inference environment is compatible with specific software requirements, such as CUDA driver versions, Linux kernel versions, or Amazon Web Services Neuron driver versions. The AMI version names, and their configurations, are the following:  al2-ami-sagemaker-inference-gpu-2    Accelerator: GPU   NVIDIA driver version: 535   CUDA version: 12.2    al2-ami-sagemaker-inference-gpu-2-1    Accelerator: GPU   NVIDIA driver version: 535   CUDA version: 12.2   NVIDIA Container Toolkit with disabled CUDA-compat mounting    al2-ami-sagemaker-inference-gpu-3-1    Accelerator: GPU   NVIDIA driver version: 550   CUDA version: 12.4   NVIDIA Container Toolkit with disabled CUDA-compat mounting    al2023-ami-sagemaker-inference-gpu-4-1    Accelerator: GPU   NVIDIA driver version: 580   CUDA version: 13.0   NVIDIA Container Toolkit with disabled CUDA-compat mounting    al2-ami-sagemaker-inference-neuron-2    Accelerator: Inferentia2 and Trainium   Neuron driver version: 2.19
     capacity_reservation_config: Settings for the capacity reservation for the compute instances that SageMaker AI reserves for an endpoint.
     """
 
@@ -6834,6 +6853,36 @@ class InferenceComponentDataCacheConfig(Base):
     enable_caching: bool
 
 
+class InferenceComponentAvailabilityZoneBalance(Base):
+    """
+    InferenceComponentAvailabilityZoneBalance
+      Configuration for balancing inference component copies across Availability Zones.
+
+    Attributes
+    ----------------------
+    enforcement_mode: Determines how strictly the Availability Zone balance constraint is enforced.  PERMISSIVE  The endpoint attempts to balance copies across Availability Zones but proceeds with scheduling even if balance can't be achieved due to available capacity or instance distribution across Availability Zones.
+    max_imbalance: The maximum allowed difference in the number of inference component copies between any two Availability Zones. This parameter applies only when the endpoint has instances across two or more Availability Zones. A copy placement is allowed if it reduces imbalance or the resulting imbalance is within this value. Default value: 0.
+    """
+
+    enforcement_mode: str
+    max_imbalance: Optional[int] = Unassigned()
+
+
+class InferenceComponentSchedulingConfig(Base):
+    """
+    InferenceComponentSchedulingConfig
+      The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.
+
+    Attributes
+    ----------------------
+    placement_strategy: The strategy for placing inference component copies across available instances. If you also set AvailabilityZoneBalance, this strategy applies to placement within each Availability Zone.  SPREAD  Distributes copies evenly across available instances for better resilience.  BINPACK  Packs copies onto fewer instances to optimize resource utilization.
+    availability_zone_balance: Configuration for balancing inference component copies across Availability Zones.
+    """
+
+    placement_strategy: str
+    availability_zone_balance: Optional[InferenceComponentAvailabilityZoneBalance] = Unassigned()
+
+
 class InferenceComponentSpecification(Base):
     """
     InferenceComponentSpecification
@@ -6847,6 +6896,7 @@ class InferenceComponentSpecification(Base):
     compute_resource_requirements: The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component. Omit this parameter if your request is meant to create an adapter inference component. An adapter inference component is loaded by a base inference component, and it uses the compute resources of the base inference component.
     base_inference_component_name: The name of an existing inference component that is to contain the inference component that you're creating with your request. Specify this parameter only if your request is meant to create an adapter inference component. An adapter inference component contains the path to an adapter model. The purpose of the adapter model is to tailor the inference output of a base foundation model, which is hosted by the base inference component. The adapter inference component uses the compute resources that you assigned to the base inference component. When you create an adapter inference component, use the Container parameter to specify the location of the adapter artifacts. In the parameter value, use the ArtifactUrl parameter of the InferenceComponentContainerSpecification data type. Before you can create an adapter inference component, you must have an existing inference component that contains the foundation model that you want to adapt.
     data_cache_config: Settings that affect how the inference component caches data.
+    scheduling_config: The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.
     """
 
     model_name: Optional[Union[str, object]] = Unassigned()
@@ -6857,6 +6907,7 @@ class InferenceComponentSpecification(Base):
     )
     base_inference_component_name: Optional[str] = Unassigned()
     data_cache_config: Optional[InferenceComponentDataCacheConfig] = Unassigned()
+    scheduling_config: Optional[InferenceComponentSchedulingConfig] = Unassigned()
 
 
 class InferenceComponentRuntimeConfig(Base):
@@ -9894,6 +9945,7 @@ class InferenceComponentSpecificationSummary(Base):
     compute_resource_requirements: The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component.
     base_inference_component_name: The name of the base inference component that contains this inference component.
     data_cache_config: Settings that affect how the inference component caches data.
+    scheduling_config: The scheduling configuration that determines how inference component copies are placed across available instances when copies are added or removed.
     """
 
     model_name: Optional[Union[str, object]] = Unassigned()
@@ -9904,6 +9956,7 @@ class InferenceComponentSpecificationSummary(Base):
     )
     base_inference_component_name: Optional[str] = Unassigned()
     data_cache_config: Optional[InferenceComponentDataCacheConfigSummary] = Unassigned()
+    scheduling_config: Optional[InferenceComponentSchedulingConfig] = Unassigned()
 
 
 class InferenceComponentRuntimeConfigSummary(Base):
@@ -10531,6 +10584,39 @@ class TrainingProgressInfo(Base):
     max_epoch: Optional[int] = Unassigned()
 
 
+class TrainingPlanExtension(Base):
+    """
+    TrainingPlanExtension
+      Details about an extension to a training plan, including the offering ID, dates, status, and cost information.
+
+    Attributes
+    ----------------------
+    training_plan_extension_offering_id: The unique identifier of the extension offering that was used to create this extension.
+    extended_at: The timestamp when the extension was created.
+    start_date: The start date of the extension period.
+    end_date: The end date of the extension period.
+    status: The current status of the extension (e.g., Pending, Active, Scheduled, Failed, Expired).
+    payment_status: The payment processing status of the extension.
+    availability_zone: The Availability Zone of the extension.
+    availability_zone_id: The Availability Zone ID of the extension.
+    duration_hours: The duration of the extension in hours.
+    upfront_fee: The upfront fee for the extension.
+    currency_code: The currency code for the upfront fee (e.g., USD).
+    """
+
+    training_plan_extension_offering_id: str
+    extended_at: Optional[datetime.datetime] = Unassigned()
+    start_date: Optional[datetime.datetime] = Unassigned()
+    end_date: Optional[datetime.datetime] = Unassigned()
+    status: Optional[str] = Unassigned()
+    payment_status: Optional[str] = Unassigned()
+    availability_zone: Optional[str] = Unassigned()
+    availability_zone_id: Optional[str] = Unassigned()
+    duration_hours: Optional[int] = Unassigned()
+    upfront_fee: Optional[str] = Unassigned()
+    currency_code: Optional[str] = Unassigned()
+
+
 class ReservedCapacitySummary(Base):
     """
     ReservedCapacitySummary
@@ -13958,6 +14044,8 @@ class ReservedCapacityOffering(Base):
     duration_minutes: The additional minutes beyond whole hours in the total duration for this reserved capacity offering.
     start_time: The start time of the reserved capacity offering.
     end_time: The end time of the reserved capacity offering.
+    extension_start_time: The start time of the extension for the reserved capacity offering.
+    extension_end_time: The end time of the extension for the reserved capacity offering.
     """
 
     instance_type: str
@@ -13970,6 +14058,8 @@ class ReservedCapacityOffering(Base):
     duration_minutes: Optional[int] = Unassigned()
     start_time: Optional[datetime.datetime] = Unassigned()
     end_time: Optional[datetime.datetime] = Unassigned()
+    extension_start_time: Optional[datetime.datetime] = Unassigned()
+    extension_end_time: Optional[datetime.datetime] = Unassigned()
 
 
 class ResourceConfigForUpdate(Base):
@@ -14367,6 +14457,31 @@ class TrainingPlanOffering(Base):
     reserved_capacity_offerings: Optional[List[ReservedCapacityOffering]] = Unassigned()
 
 
+class TrainingPlanExtensionOffering(Base):
+    """
+    TrainingPlanExtensionOffering
+      Details about an available extension offering for a training plan. Use the offering ID with the  ExtendTrainingPlan  API to extend a training plan.
+
+    Attributes
+    ----------------------
+    training_plan_extension_offering_id: The unique identifier for this extension offering.
+    availability_zone: The Availability Zone for this extension offering.
+    start_date: The start date of this extension offering.
+    end_date: The end date of this extension offering.
+    duration_hours: The duration of this extension offering in hours.
+    upfront_fee: The upfront fee for this extension offering.
+    currency_code: The currency code for the upfront fee (e.g., USD).
+    """
+
+    training_plan_extension_offering_id: str
+    availability_zone: Optional[str] = Unassigned()
+    start_date: Optional[datetime.datetime] = Unassigned()
+    end_date: Optional[datetime.datetime] = Unassigned()
+    duration_hours: Optional[int] = Unassigned()
+    upfront_fee: Optional[str] = Unassigned()
+    currency_code: Optional[str] = Unassigned()
+
+
 class ServiceCatalogProvisioningUpdateDetails(Base):
     """
     ServiceCatalogProvisioningUpdateDetails
diff --git a/src/sagemaker_core/tools/api_coverage.json b/src/sagemaker_core/tools/api_coverage.json
index 1f2a774..b2b6a32 100644
--- a/src/sagemaker_core/tools/api_coverage.json
+++ b/src/sagemaker_core/tools/api_coverage.json
@@ -1 +1 @@
-{"SupportedAPIs": 372, "UnsupportedAPIs": 16}
\ No newline at end of file
+{"SupportedAPIs": 373, "UnsupportedAPIs": 17}
\ No newline at end of file