From 030e3f750907b5ba4816f7b852172f41de69cc2c Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 08:25:30 -0700 Subject: [PATCH 1/7] fix(deploy): sanitize service names in deploy progress output Strip ANSI escape sequences and non-printable characters from service names before rendering in the deploy progress table to prevent terminal output manipulation from malicious azure.yaml configurations. Closes #7972 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/internal/cmd/deploy_progress.go | 21 +++++++++++-- cli/azd/internal/cmd/deploy_progress_test.go | 31 ++++++++++++++++---- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/cli/azd/internal/cmd/deploy_progress.go b/cli/azd/internal/cmd/deploy_progress.go index 14c87be6cf9..5273893b75b 100644 --- a/cli/azd/internal/cmd/deploy_progress.go +++ b/cli/azd/internal/cmd/deploy_progress.go @@ -7,9 +7,11 @@ import ( "context" "fmt" "io" + "regexp" "strings" "sync" "time" + "unicode" ) // Column widths for the progress table. @@ -69,6 +71,21 @@ type deployProgressTracker struct { lastLines int // how many lines we rendered last time (for ANSI overwrite) } +// ansiEscapeRe matches ANSI escape sequences (CSI and OSC). +var ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*\x07`) + +// sanitizeServiceName strips ANSI escape sequences and non-printable characters +// from a service name to prevent terminal output manipulation. +func sanitizeServiceName(name string) string { + name = ansiEscapeRe.ReplaceAllString(name, "") + return strings.Map(func(r rune) rune { + if r == '\t' || (unicode.IsPrint(r) && !unicode.IsControl(r)) { + return r + } + return -1 + }, name) +} + func newDeployProgressTracker( writer io.Writer, interactive bool, serviceNames []string, ) *deployProgressTracker { @@ -76,7 +93,7 @@ func newDeployProgressTracker( idx := make(map[string]int, len(serviceNames)) for i, name := range serviceNames { services[i] = &serviceStatus{ - name: name, + name: sanitizeServiceName(name), phase: phaseWaiting, } idx[name] = i @@ -115,7 +132,7 @@ func (t *deployProgressTracker) Update( if !t.interactive { // Non-interactive (CI): one line per event elapsed := svc.elapsed() - line := fmt.Sprintf(" %s: %s", serviceName, phase) + line := fmt.Sprintf(" %s: %s", svc.name, phase) if detail != "" { line += fmt.Sprintf(" (%s)", detail) } diff --git a/cli/azd/internal/cmd/deploy_progress_test.go b/cli/azd/internal/cmd/deploy_progress_test.go index 910cf5fafb7..0a491ce011e 100644 --- a/cli/azd/internal/cmd/deploy_progress_test.go +++ b/cli/azd/internal/cmd/deploy_progress_test.go @@ -168,17 +168,38 @@ func TestDeployProgressTracker_DetailTruncation(t *testing.T) { func TestDeployProgressTracker_ServiceNameWithANSI(t *testing.T) { // Security: service names come from azure.yaml and could contain // ANSI escape codes that hijack terminal output. The progress - // tracker should not crash or produce malformed output with such - // names. This is a defense-in-depth test—injection requires a - // malicious azure.yaml the user already controls. + // tracker should strip them to prevent terminal manipulation. ansiName := "\033[31mevil\033[0m" var buf bytes.Buffer tracker := newDeployProgressTracker(&buf, false, []string{ansiName}) - // Should not panic tracker.Update(ansiName, phasePackaging, "building") output := buf.String() - assert.Contains(t, output, "Packaging") + assert.Contains(t, output, "evil: Packaging") + assert.NotContains(t, output, "\033[") +} + +func TestSanitizeServiceName(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"plain", "web-api", "web-api"}, + {"ansi_color", "\033[31mred\033[0m", "red"}, + {"ansi_csi_bold", "\033[1mbold\033[22m", "bold"}, + {"osc_title", "\033]0;title\x07rest", "rest"}, + {"null_bytes", "svc\x00name", "svcname"}, + {"control_chars", "hello\x01\x02world", "helloworld"}, + {"tab_preserved", "svc\tname", "svc\tname"}, + {"unicode_printable", "café-svc", "café-svc"}, + {"empty", "", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, sanitizeServiceName(tt.in)) + }) + } } func TestDeployProgressTracker_EmptyServiceList(t *testing.T) { From a064b7c0d70766f6a393d57c3681bf6d1972d1ba Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 08:25:32 -0700 Subject: [PATCH 2/7] docs(env): document lock acquisition order for saveMu and flock Add explicit lock-ordering documentation to pkg/environment/manager.go and docs/concurrency-model.md explaining the saveMu -> flock -> env.mu chain and how subprocess hooks interact with the locking hierarchy. Closes #7969 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/docs/concurrency-model.md | 58 ++++++++++++++++++++++++++++++ cli/azd/pkg/environment/manager.go | 17 +++++++++ 2 files changed, 75 insertions(+) diff --git a/cli/azd/docs/concurrency-model.md b/cli/azd/docs/concurrency-model.md index 6fe33ce7ebb..5714b2189d0 100644 --- a/cli/azd/docs/concurrency-model.md +++ b/cli/azd/docs/concurrency-model.md @@ -169,6 +169,64 @@ both attempt initialization; the lock ensures only one succeeds. --- +## Lock Acquisition Order + +Consistent lock ordering prevents deadlocks. The environment persistence path +uses a three-level hierarchy: + +```text +1. manager.saveMu (in-process sync.Mutex — serializes goroutines) +2. local flock (cross-process OS file lock — serializes processes) +3. env.mu (per-Environment sync.RWMutex — protects in-memory map) +``` + +Every code path that persists or reloads environment state acquires these locks +**in the order above**. Never acquire an outer lock while holding an inner one. + +### Why subprocess hooks cannot deadlock + +Parallel service hooks (pre/post-deploy, pre/post-package) may spawn `azd env set` +subprocesses concurrently. Each subprocess is a separate OS process with its **own** +`manager.saveMu` instance — in-process mutexes are not shared across processes. + +Cross-process serialization is handled entirely by **flock** (the OS-level file +lock on the `.env.lock` file). Because each process independently acquires only +flock → env.mu (steps 2–3) and never holds another process's saveMu, circular +wait is impossible. + +```text +┌─────────────────────────────────────────────┐ +│ Parent azd process │ +│ │ +│ goroutine A: saveMu → flock → env.mu │ +│ goroutine B: saveMu → flock → env.mu │ +│ (saveMu serializes A and B) │ +└─────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────┐ +│ Hook subprocess 1 (`azd env set FOO=bar`) │ +│ main goroutine: saveMu → flock → env.mu │ +└─────────────────────────────────────────────┘ + +┌─────────────────────────────────────────────┐ +│ Hook subprocess 2 (`azd env set BAZ=qux`) │ +│ main goroutine: saveMu → flock → env.mu │ +└─────────────────────────────────────────────┘ + +Across processes, only flock provides mutual exclusion. +Within a process, saveMu prevents goroutine interleaving. +``` + +### What is held during subprocess invocations + +When the parent process launches hook subprocesses, it does **not** hold saveMu +or flock during the subprocess lifetime. The hook runner starts the subprocess +and waits for it to exit — locks are only acquired when the subprocess (or the +parent) calls `Save`/`Reload`. This means hook subprocesses never contend with +locks held by the parent's hook-launch path. + +--- + ## Adding new concurrent state When you introduce a new field on one of the types above (or a new type that diff --git a/cli/azd/pkg/environment/manager.go b/cli/azd/pkg/environment/manager.go index 832f0311c28..f07fe044aff 100644 --- a/cli/azd/pkg/environment/manager.go +++ b/cli/azd/pkg/environment/manager.go @@ -107,6 +107,23 @@ type manager struct { // parallel services running publish/deploy/hooks against the same // *Environment cannot corrupt the on-disk .env file or interleave a // partial Reload with another goroutine's writes. + // + // Lock acquisition order (MUST be consistent across all call sites): + // + // 1. manager.saveMu (in-process sync.Mutex, serializes goroutines) + // 2. local flock (cross-process OS file lock via gofrs/flock) + // 3. env.mu (per-Environment sync.RWMutex, protects in-memory map) + // + // Subprocess hooks (`azd env set`) spawn a separate azd process that + // acquires flock independently (no saveMu — that's per-process). The + // in-process saveMu prevents concurrent Save/Reload within the SAME + // process; the cross-process flock prevents concurrent file I/O across + // DIFFERENT processes (e.g., parallel hook subprocesses). No deadlock + // is possible because saveMu is never held cross-process: each OS + // process has its own saveMu instance, and flock handles inter-process + // serialization. + // + // See also: docs/concurrency-model.md "Lock Acquisition Order" section. saveMu sync.Mutex // State cache manager for managing cached Azure resource information From 74709d64c70c147b5338bc6e49db6c864873a97d Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 08:26:34 -0700 Subject: [PATCH 3/7] feat(up): add per-phase timing breakdown to azd up SUCCESS message Closes #7983 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/internal/cmd/up_graph.go | 49 +++++++++++++++++++ cli/azd/internal/cmd/up_graph_test.go | 70 +++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 cli/azd/internal/cmd/up_graph_test.go diff --git a/cli/azd/internal/cmd/up_graph.go b/cli/azd/internal/cmd/up_graph.go index 740baf51467..be5a236bead 100644 --- a/cli/azd/internal/cmd/up_graph.go +++ b/cli/azd/internal/cmd/up_graph.go @@ -603,10 +603,59 @@ func (u *UpGraphAction) Run( "Your application was provisioned and deployed to Azure in %s.", ux.DurationAsText(since(startTime)), ), + FollowUp: phaseTimingBreakdown(result.Steps), }, }, nil } +// phaseTimingBreakdown computes wall-clock durations for provisioning and deploying phases +// by finding the earliest start and latest end among matching steps. +func phaseTimingBreakdown(steps []exegraph.StepTiming) string { + var provStart, deployStart time.Time + var provEnd, deployEnd time.Time + + for _, st := range steps { + if st.Status == exegraph.StepSkipped { + continue + } + switch { + case strings.HasPrefix(st.Name, "provision-") || + strings.HasPrefix(st.Name, "cmdhook-preprovision") || + strings.HasPrefix(st.Name, "cmdhook-postprovision"): + if provStart.IsZero() || st.Start.Before(provStart) { + provStart = st.Start + } + if st.End.After(provEnd) { + provEnd = st.End + } + case strings.HasPrefix(st.Name, "package-") || + strings.HasPrefix(st.Name, "publish-") || + strings.HasPrefix(st.Name, "deploy-") || + strings.HasPrefix(st.Name, "cmdhook-predeploy") || + strings.HasPrefix(st.Name, "cmdhook-postdeploy"): + if deployStart.IsZero() || st.Start.Before(deployStart) { + deployStart = st.Start + } + if st.End.After(deployEnd) { + deployEnd = st.End + } + } + } + + var lines []string + if !provStart.IsZero() { + lines = append(lines, fmt.Sprintf(" Provisioning: %s", ux.DurationAsText(provEnd.Sub(provStart)))) + } + if !deployStart.IsZero() { + lines = append(lines, fmt.Sprintf(" Deploying: %s", ux.DurationAsText(deployEnd.Sub(deployStart)))) + } + + if len(lines) == 0 { + return "" + } + return strings.Join(lines, "\n") +} + // changedFlagNames returns the names of flags that were explicitly set on // the FlagSet. Returns nil for a nil FlagSet so callers can pass through // without a guard. Mirrors the changedFlags collection in diff --git a/cli/azd/internal/cmd/up_graph_test.go b/cli/azd/internal/cmd/up_graph_test.go new file mode 100644 index 00000000000..b31629d5966 --- /dev/null +++ b/cli/azd/internal/cmd/up_graph_test.go @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package cmd + +import ( + "testing" + "time" + + "github.com/azure/azure-dev/cli/azd/pkg/exegraph" +) + +func TestPhaseTimingBreakdown(t *testing.T) { + t.Parallel() + base := time.Now() + + tests := []struct { + name string + steps []exegraph.StepTiming + want string + }{ + { + name: "empty steps", + steps: nil, + want: "", + }, + { + name: "provision only", + steps: []exegraph.StepTiming{ + {Name: "provision-infra", Status: exegraph.StepDone, Start: base, End: base.Add(5 * time.Minute)}, + }, + want: " Provisioning: 5 minutes", + }, + { + name: "deploy only", + steps: []exegraph.StepTiming{ + {Name: "package-web", Status: exegraph.StepDone, Start: base, End: base.Add(30 * time.Second)}, + {Name: "deploy-web", Status: exegraph.StepDone, Start: base.Add(30 * time.Second), End: base.Add(90 * time.Second)}, + }, + want: " Deploying: 1 minute 30 seconds", + }, + { + name: "both phases", + steps: []exegraph.StepTiming{ + {Name: "provision-infra", Status: exegraph.StepDone, Start: base, End: base.Add(9 * time.Minute)}, + {Name: "package-web", Status: exegraph.StepDone, Start: base.Add(9 * time.Minute), End: base.Add(10 * time.Minute)}, + {Name: "deploy-web", Status: exegraph.StepDone, Start: base.Add(10 * time.Minute), End: base.Add(11 * time.Minute)}, + }, + want: " Provisioning: 9 minutes\n Deploying: 2 minutes", + }, + { + name: "skipped steps excluded", + steps: []exegraph.StepTiming{ + {Name: "provision-infra", Status: exegraph.StepSkipped, Start: time.Time{}, End: time.Time{}}, + {Name: "deploy-web", Status: exegraph.StepDone, Start: base, End: base.Add(45 * time.Second)}, + }, + want: " Deploying: 45 seconds", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := phaseTimingBreakdown(tt.steps) + if got != tt.want { + t.Errorf("phaseTimingBreakdown() =\n%q\nwant:\n%q", got, tt.want) + } + }) + } +} From be0a5cc259312ab1b1a6e7510063c607291258bf Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 08:40:06 -0700 Subject: [PATCH 4/7] fix: resolve preflight lint and gofmt findings Break long test lines to satisfy 125-char lll limit. Apply gofmt to magefile.go (pre-existing formatting drift). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/internal/cmd/up_graph_test.go | 25 ++++++++++++++++++++----- cli/azd/magefile.go | 16 ++++++++-------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/cli/azd/internal/cmd/up_graph_test.go b/cli/azd/internal/cmd/up_graph_test.go index b31629d5966..68d027f3892 100644 --- a/cli/azd/internal/cmd/up_graph_test.go +++ b/cli/azd/internal/cmd/up_graph_test.go @@ -34,17 +34,32 @@ func TestPhaseTimingBreakdown(t *testing.T) { { name: "deploy only", steps: []exegraph.StepTiming{ - {Name: "package-web", Status: exegraph.StepDone, Start: base, End: base.Add(30 * time.Second)}, - {Name: "deploy-web", Status: exegraph.StepDone, Start: base.Add(30 * time.Second), End: base.Add(90 * time.Second)}, + { + Name: "package-web", Status: exegraph.StepDone, + Start: base, End: base.Add(30 * time.Second), + }, + { + Name: "deploy-web", Status: exegraph.StepDone, + Start: base.Add(30 * time.Second), End: base.Add(90 * time.Second), + }, }, want: " Deploying: 1 minute 30 seconds", }, { name: "both phases", steps: []exegraph.StepTiming{ - {Name: "provision-infra", Status: exegraph.StepDone, Start: base, End: base.Add(9 * time.Minute)}, - {Name: "package-web", Status: exegraph.StepDone, Start: base.Add(9 * time.Minute), End: base.Add(10 * time.Minute)}, - {Name: "deploy-web", Status: exegraph.StepDone, Start: base.Add(10 * time.Minute), End: base.Add(11 * time.Minute)}, + { + Name: "provision-infra", Status: exegraph.StepDone, + Start: base, End: base.Add(9 * time.Minute), + }, + { + Name: "package-web", Status: exegraph.StepDone, + Start: base.Add(9 * time.Minute), End: base.Add(10 * time.Minute), + }, + { + Name: "deploy-web", Status: exegraph.StepDone, + Start: base.Add(10 * time.Minute), End: base.Add(11 * time.Minute), + }, }, want: " Provisioning: 9 minutes\n Deploying: 2 minutes", }, diff --git a/cli/azd/magefile.go b/cli/azd/magefile.go index 2ff84a0b31e..f56d0f5048a 100644 --- a/cli/azd/magefile.go +++ b/cli/azd/magefile.go @@ -566,15 +566,15 @@ var excludedPlaybackTests = map[string]string{ // Recordings affected by feat/exegraph: the graph-driven up/provision path // introduces legitimate new HTTP interactions (layer hash probes, resource-group // existence checks). Must be re-recorded with live Azure credentials before merge. - "Test_DeploymentStacks": "needs re-record for feat/exegraph graph-driven provision", - "Test_CLI_ProvisionState": "needs re-record for feat/exegraph graph-driven provision", - "Test_CLI_InfraCreateAndDeleteUpperCase": "needs re-record for feat/exegraph graph-driven provision", - "Test_CLI_PreflightQuota_Sub_DefaultCapacity": "stale recording; missing extension registry + resource group interactions", - "Test_CLI_PreflightQuota_Sub_InvalidModelName": "stale recording; missing extension registry + resource group interactions", + "Test_DeploymentStacks": "needs re-record for feat/exegraph graph-driven provision", + "Test_CLI_ProvisionState": "needs re-record for feat/exegraph graph-driven provision", + "Test_CLI_InfraCreateAndDeleteUpperCase": "needs re-record for feat/exegraph graph-driven provision", + "Test_CLI_PreflightQuota_Sub_DefaultCapacity": "stale recording; missing extension registry + resource group interactions", + "Test_CLI_PreflightQuota_Sub_InvalidModelName": "stale recording; missing extension registry + resource group interactions", "Test_CLI_PreflightQuota_Sub_DifferentLocation": "stale recording; missing extension registry + resource group interactions", - "Test_CLI_PreflightQuota_RG_DefaultCapacity": "stale recording; missing extension registry + resource group interactions", - "Test_CLI_PreflightQuota_RG_InvalidVersion": "stale recording; missing extension registry + resource group interactions", - "Test_CLI_PreflightQuota_RG_InvalidModelName": "stale recording; missing extension registry + resource group interactions", + "Test_CLI_PreflightQuota_RG_DefaultCapacity": "stale recording; missing extension registry + resource group interactions", + "Test_CLI_PreflightQuota_RG_InvalidVersion": "stale recording; missing extension registry + resource group interactions", + "Test_CLI_PreflightQuota_RG_InvalidModelName": "stale recording; missing extension registry + resource group interactions", } // discoverPlaybackTests scans the recordings directory for .yaml files and From dd83b7ea232f8c79712d49a5e2d59edbb119fab1 Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 10:08:09 -0700 Subject: [PATCH 5/7] Add per-phase duration telemetry to azd up Emit perf.provision_duration_ms, perf.deploy_duration_ms, and perf.total_duration_ms as usage attributes on the cmd.up span. These enable backend tracking of provisioning vs deploying wall-clock durations independently. Also fix the phase timing breakdown to exclude package/publish steps from the deploying window (they run concurrently with provisioning), which caused the displayed deploy duration to equal the total. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/internal/cmd/up_graph.go | 47 +++++++++++++++++------ cli/azd/internal/cmd/up_graph_test.go | 4 +- cli/azd/internal/tracing/fields/fields.go | 27 +++++++++++++ 3 files changed, 65 insertions(+), 13 deletions(-) diff --git a/cli/azd/internal/cmd/up_graph.go b/cli/azd/internal/cmd/up_graph.go index be5a236bead..c1812023211 100644 --- a/cli/azd/internal/cmd/up_graph.go +++ b/cli/azd/internal/cmd/up_graph.go @@ -597,6 +597,17 @@ func (u *UpGraphAction) Run( log.Printf("warning: failed to invalidate state cache: %v", cacheErr) } + // Emit per-phase duration telemetry for backend performance tracking. + totalMs := since(startTime).Milliseconds() + tracing.SetUsageAttributes(fields.PerfTotalDurationMs.Int64(totalMs)) + provDur, deployDur := phaseDurations(result.Steps) + if provDur > 0 { + tracing.SetUsageAttributes(fields.PerfProvisionDurationMs.Int64(provDur.Milliseconds())) + } + if deployDur > 0 { + tracing.SetUsageAttributes(fields.PerfDeployDurationMs.Int64(deployDur.Milliseconds())) + } + return &actions.ActionResult{ Message: &actions.ResultMessage{ Header: fmt.Sprintf( @@ -610,7 +621,28 @@ func (u *UpGraphAction) Run( // phaseTimingBreakdown computes wall-clock durations for provisioning and deploying phases // by finding the earliest start and latest end among matching steps. +// Package/publish steps run concurrently with provisioning, so they are excluded from the +// deploy window to avoid showing a deploy duration that exceeds the total. func phaseTimingBreakdown(steps []exegraph.StepTiming) string { + provDur, deployDur := phaseDurations(steps) + + var lines []string + if provDur > 0 { + lines = append(lines, fmt.Sprintf(" Provisioning: %s", ux.DurationAsText(provDur))) + } + if deployDur > 0 { + lines = append(lines, fmt.Sprintf(" Deploying: %s", ux.DurationAsText(deployDur))) + } + + if len(lines) == 0 { + return "" + } + return strings.Join(lines, "\n") +} + +// phaseDurations computes the wall-clock duration for provisioning and deploying phases. +// Returns zero durations for phases that were not executed. +func phaseDurations(steps []exegraph.StepTiming) (provision, deploy time.Duration) { var provStart, deployStart time.Time var provEnd, deployEnd time.Time @@ -628,9 +660,7 @@ func phaseTimingBreakdown(steps []exegraph.StepTiming) string { if st.End.After(provEnd) { provEnd = st.End } - case strings.HasPrefix(st.Name, "package-") || - strings.HasPrefix(st.Name, "publish-") || - strings.HasPrefix(st.Name, "deploy-") || + case strings.HasPrefix(st.Name, "deploy-") || strings.HasPrefix(st.Name, "cmdhook-predeploy") || strings.HasPrefix(st.Name, "cmdhook-postdeploy"): if deployStart.IsZero() || st.Start.Before(deployStart) { @@ -642,18 +672,13 @@ func phaseTimingBreakdown(steps []exegraph.StepTiming) string { } } - var lines []string if !provStart.IsZero() { - lines = append(lines, fmt.Sprintf(" Provisioning: %s", ux.DurationAsText(provEnd.Sub(provStart)))) + provision = provEnd.Sub(provStart) } if !deployStart.IsZero() { - lines = append(lines, fmt.Sprintf(" Deploying: %s", ux.DurationAsText(deployEnd.Sub(deployStart)))) + deploy = deployEnd.Sub(deployStart) } - - if len(lines) == 0 { - return "" - } - return strings.Join(lines, "\n") + return provision, deploy } // changedFlagNames returns the names of flags that were explicitly set on diff --git a/cli/azd/internal/cmd/up_graph_test.go b/cli/azd/internal/cmd/up_graph_test.go index 68d027f3892..e39cea392fa 100644 --- a/cli/azd/internal/cmd/up_graph_test.go +++ b/cli/azd/internal/cmd/up_graph_test.go @@ -43,7 +43,7 @@ func TestPhaseTimingBreakdown(t *testing.T) { Start: base.Add(30 * time.Second), End: base.Add(90 * time.Second), }, }, - want: " Deploying: 1 minute 30 seconds", + want: " Deploying: 1 minute", }, { name: "both phases", @@ -61,7 +61,7 @@ func TestPhaseTimingBreakdown(t *testing.T) { Start: base.Add(10 * time.Minute), End: base.Add(11 * time.Minute), }, }, - want: " Provisioning: 9 minutes\n Deploying: 2 minutes", + want: " Provisioning: 9 minutes\n Deploying: 1 minute", }, { name: "skipped steps excluded", diff --git a/cli/azd/internal/tracing/fields/fields.go b/cli/azd/internal/tracing/fields/fields.go index b5b63065c65..76aa80d4a48 100644 --- a/cli/azd/internal/tracing/fields/fields.go +++ b/cli/azd/internal/tracing/fields/fields.go @@ -703,6 +703,33 @@ var ( Purpose: PerformanceAndHealth, IsMeasurement: true, } + + // PerfProvisionDurationMs is the wall-clock provisioning phase duration in milliseconds. + // Measured from the earliest provision step start to the latest provision step end. + PerfProvisionDurationMs = AttributeKey{ + Key: attribute.Key("perf.provision_duration_ms"), + Classification: SystemMetadata, + Purpose: PerformanceAndHealth, + IsMeasurement: true, + } + + // PerfDeployDurationMs is the wall-clock deploying phase duration in milliseconds. + // Measured from the earliest deploy step start to the latest deploy step end. + // Package and publish steps are excluded (they run concurrently with provisioning). + PerfDeployDurationMs = AttributeKey{ + Key: attribute.Key("perf.deploy_duration_ms"), + Classification: SystemMetadata, + Purpose: PerformanceAndHealth, + IsMeasurement: true, + } + + // PerfTotalDurationMs is the total wall-clock duration for the entire up-graph execution. + PerfTotalDurationMs = AttributeKey{ + Key: attribute.Key("perf.total_duration_ms"), + Classification: SystemMetadata, + Purpose: PerformanceAndHealth, + IsMeasurement: true, + } ) // Pack related fields From bceff4f1a136c92eafaf587b349c0e7476c169d9 Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 15:46:07 -0700 Subject: [PATCH 6/7] Address Copilot review: clarify subprocess locking docs, broaden ANSI regex - Reword manager.go and concurrency-model.md to clarify that subprocesses have their own saveMu instance (same acquisition order applies per-process). - Broaden ansiEscapeRe to cover CSI final byte '~' and OSC terminated by ST (ESC \) in addition to BEL. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/docs/concurrency-model.md | 6 +++--- cli/azd/internal/cmd/deploy_progress.go | 2 +- cli/azd/pkg/environment/manager.go | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cli/azd/docs/concurrency-model.md b/cli/azd/docs/concurrency-model.md index 5714b2189d0..af6e57dfe6f 100644 --- a/cli/azd/docs/concurrency-model.md +++ b/cli/azd/docs/concurrency-model.md @@ -190,9 +190,9 @@ subprocesses concurrently. Each subprocess is a separate OS process with its **o `manager.saveMu` instance — in-process mutexes are not shared across processes. Cross-process serialization is handled entirely by **flock** (the OS-level file -lock on the `.env.lock` file). Because each process independently acquires only -flock → env.mu (steps 2–3) and never holds another process's saveMu, circular -wait is impossible. +lock on the `.env.lock` file). Within each subprocess the same acquisition order +applies (saveMu → flock → env.mu), but since saveMu is per-process and never +shared across process boundaries, circular wait is impossible. ```text ┌─────────────────────────────────────────────┐ diff --git a/cli/azd/internal/cmd/deploy_progress.go b/cli/azd/internal/cmd/deploy_progress.go index 5273893b75b..711ad497a0d 100644 --- a/cli/azd/internal/cmd/deploy_progress.go +++ b/cli/azd/internal/cmd/deploy_progress.go @@ -72,7 +72,7 @@ type deployProgressTracker struct { } // ansiEscapeRe matches ANSI escape sequences (CSI and OSC). -var ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]|\x1b\][^\x07]*\x07`) +var ansiEscapeRe = regexp.MustCompile(`\x1b\[[0-9;]*[A-Za-z~]|\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)`) // sanitizeServiceName strips ANSI escape sequences and non-printable characters // from a service name to prevent terminal output manipulation. diff --git a/cli/azd/pkg/environment/manager.go b/cli/azd/pkg/environment/manager.go index f07fe044aff..7132f208754 100644 --- a/cli/azd/pkg/environment/manager.go +++ b/cli/azd/pkg/environment/manager.go @@ -114,14 +114,15 @@ type manager struct { // 2. local flock (cross-process OS file lock via gofrs/flock) // 3. env.mu (per-Environment sync.RWMutex, protects in-memory map) // - // Subprocess hooks (`azd env set`) spawn a separate azd process that - // acquires flock independently (no saveMu — that's per-process). The - // in-process saveMu prevents concurrent Save/Reload within the SAME - // process; the cross-process flock prevents concurrent file I/O across - // DIFFERENT processes (e.g., parallel hook subprocesses). No deadlock - // is possible because saveMu is never held cross-process: each OS - // process has its own saveMu instance, and flock handles inter-process - // serialization. + // Subprocess hooks (`azd env set`) spawn a separate azd process with + // its own saveMu instance (in-process mutexes are not shared across + // process boundaries). Within the subprocess, the same acquisition + // order applies: saveMu → flock → env.mu. The in-process saveMu + // prevents concurrent Save/Reload within the SAME process; the + // cross-process flock prevents concurrent file I/O across DIFFERENT + // processes (e.g., parallel hook subprocesses). No deadlock is + // possible because saveMu is never shared cross-process and flock + // handles inter-process serialization. // // See also: docs/concurrency-model.md "Lock Acquisition Order" section. saveMu sync.Mutex From 8af9e926f30a59632bf9455484c4693e2aeaef6e Mon Sep 17 00:00:00 2001 From: Jon Gallant <2163001+jongio@users.noreply.github.com> Date: Tue, 5 May 2026 16:05:09 -0700 Subject: [PATCH 7/7] Add test cases for CSI tilde and OSC ST regex coverage Adds csi_tilde_final and osc_st_terminator test cases to TestSanitizeServiceName exercising the broadened ANSI regex patterns. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cli/azd/internal/cmd/deploy_progress_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/azd/internal/cmd/deploy_progress_test.go b/cli/azd/internal/cmd/deploy_progress_test.go index 0a491ce011e..0f337402be5 100644 --- a/cli/azd/internal/cmd/deploy_progress_test.go +++ b/cli/azd/internal/cmd/deploy_progress_test.go @@ -188,7 +188,9 @@ func TestSanitizeServiceName(t *testing.T) { {"plain", "web-api", "web-api"}, {"ansi_color", "\033[31mred\033[0m", "red"}, {"ansi_csi_bold", "\033[1mbold\033[22m", "bold"}, + {"csi_tilde_final", "\033[15~keypress", "keypress"}, {"osc_title", "\033]0;title\x07rest", "rest"}, + {"osc_st_terminator", "\033]0;title\033\\rest", "rest"}, {"null_bytes", "svc\x00name", "svcname"}, {"control_chars", "hello\x01\x02world", "helloworld"}, {"tab_preserved", "svc\tname", "svc\tname"},