From 593e4ae5b8a3604b0b2cca55e62756b7966a051d Mon Sep 17 00:00:00 2001 From: Yusuf Mohsinally <463376+yusufm@users.noreply.github.com> Date: Tue, 17 Mar 2026 23:11:59 -0700 Subject: [PATCH] Fix atomic entity writes and scoped git identity --- smallfactory/core/v1/entities.py | 28 +++++++++-- smallfactory/core/v1/gitutils.py | 69 +++++++++++++++++----------- tests/test_entities_core_behavior.py | 17 +++++++ tests/test_git_workflow.py | 30 +++++++++++- web/app.py | 19 ++------ 5 files changed, 118 insertions(+), 45 deletions(-) diff --git a/smallfactory/core/v1/entities.py b/smallfactory/core/v1/entities.py index c8e2c2d..2d779fe 100644 --- a/smallfactory/core/v1/entities.py +++ b/smallfactory/core/v1/entities.py @@ -61,9 +61,29 @@ def _read_yaml(p: Path) -> dict: return yaml.safe_load(f) or {} +def _atomic_write_text_file(p: Path, writer) -> None: + p.parent.mkdir(parents=True, exist_ok=True) + tmp_path = None + try: + with tempfile.NamedTemporaryFile("w", dir=p.parent, delete=False, encoding="utf-8") as tf: + tmp_path = Path(tf.name) + writer(tf) + tf.flush() + os.fsync(tf.fileno()) + if tmp_path is None: + raise RuntimeError(f"Failed to create temporary file for '{p.name}'") + tmp_path.replace(p) + except Exception: + if tmp_path is not None and tmp_path.exists(): + try: + tmp_path.unlink() + except Exception: + pass + raise + + def _write_yaml(p: Path, data: dict) -> None: - with open(p, "w") as f: - yaml.safe_dump(data, f, sort_keys=False) + _atomic_write_text_file(p, lambda f: yaml.safe_dump(data, f, sort_keys=False)) def _read_events_jsonl(p: Path) -> List[dict]: @@ -86,11 +106,11 @@ def _read_events_jsonl(p: Path) -> List[dict]: def _write_events_jsonl(p: Path, events: List[dict]) -> None: - p.parent.mkdir(parents=True, exist_ok=True) - with open(p, "w", encoding="utf-8") as f: + def _write(f) -> None: for ev in events: f.write(json.dumps(ev, separators=(",", ":"), ensure_ascii=True)) f.write("\n") + _atomic_write_text_file(p, _write) def _assert_mutations_allowed(datarepo_path: Path) -> None: diff --git a/smallfactory/core/v1/gitutils.py b/smallfactory/core/v1/gitutils.py index 34cbff8..ed7268d 100644 --- a/smallfactory/core/v1/gitutils.py +++ b/smallfactory/core/v1/gitutils.py @@ -1,4 +1,7 @@ +import os import subprocess +from contextlib import contextmanager +from contextvars import ContextVar from pathlib import Path from .locks import assert_no_upgrade_in_progress @@ -19,6 +22,40 @@ class GitCommitError(GitError): class GitPushError(GitError): pass + +_GIT_ENV_OVERRIDES: ContextVar[dict[str, str] | None] = ContextVar("sf_git_env_overrides", default=None) + + +@contextmanager +def git_identity_env(name: str, email: str): + current = dict(_GIT_ENV_OVERRIDES.get() or {}) + current.update({ + "GIT_AUTHOR_NAME": name, + "GIT_AUTHOR_EMAIL": email, + "GIT_COMMITTER_NAME": name, + "GIT_COMMITTER_EMAIL": email, + }) + token = _GIT_ENV_OVERRIDES.set(current) + try: + yield + finally: + _GIT_ENV_OVERRIDES.reset(token) + + +def _git_run(args: list[str], *, cwd: Path, capture_output: bool = True, text: bool = True): + env_overrides = _GIT_ENV_OVERRIDES.get() + env = None + if env_overrides: + env = os.environ.copy() + env.update(env_overrides) + return subprocess.run( + args, + cwd=cwd, + capture_output=capture_output, + text=text, + env=env, + ) + # Note: git_commit_and_push was removed. Use git_commit_paths (commit-only) # and orchestrate push via higher-level transaction guard in web/CLI. def git_commit_paths(repo_path: Path, paths: list[Path], message: str, delete: bool = False) -> None: @@ -35,24 +72,14 @@ def git_commit_paths(repo_path: Path, paths: list[Path], message: str, delete: b try: # Some unit tests use plain directories without git initialization. # In that mode, file mutations should still work and commit becomes a no-op. - ck = subprocess.run( - ["git", "rev-parse", "--is-inside-work-tree"], - cwd=repo_path, - capture_output=True, - text=True, - ) + ck = _git_run(["git", "rev-parse", "--is-inside-work-tree"], cwd=repo_path) if ck.returncode != 0: return for p in paths: if delete: # Stage removals and remove from working tree; ignore if path is untracked. - r = subprocess.run( - ["git", "rm", "-fr", "--ignore-unmatch", "--", str(p)], - cwd=repo_path, - capture_output=True, - text=True, - ) + r = _git_run(["git", "rm", "-fr", "--ignore-unmatch", "--", str(p)], cwd=repo_path) if r.returncode != 0: raise GitCommitError( "git rm failed", @@ -63,12 +90,7 @@ def git_commit_paths(repo_path: Path, paths: list[Path], message: str, delete: b ) else: # Use -A so deletions under a directory are staged as well. - r = subprocess.run( - ["git", "add", "-A", "--", str(p)], - cwd=repo_path, - capture_output=True, - text=True, - ) + r = _git_run(["git", "add", "-A", "--", str(p)], cwd=repo_path) if r.returncode != 0: raise GitCommitError( "git add failed", @@ -78,12 +100,7 @@ def git_commit_paths(repo_path: Path, paths: list[Path], message: str, delete: b stderr=r.stderr, ) - cm = subprocess.run( - ["git", "commit", "-m", message], - cwd=repo_path, - capture_output=True, - text=True, - ) + cm = _git_run(["git", "commit", "-m", message], cwd=repo_path) if cm.returncode != 0: out = (cm.stdout or "") + "\n" + (cm.stderr or "") low = out.lower() @@ -113,7 +130,7 @@ def git_push(repo_path: Path, remote: str = "origin", ref: str = "HEAD") -> bool Returns True if a push was attempted (and succeeded), False if remote missing. Prints a warning on failure and returns False. """ - remotes = subprocess.run(["git", "remote"], cwd=repo_path, capture_output=True, text=True) + remotes = _git_run(["git", "remote"], cwd=repo_path) if remotes.returncode != 0: raise GitPushError( "git remote failed", @@ -125,7 +142,7 @@ def git_push(repo_path: Path, remote: str = "origin", ref: str = "HEAD") -> bool if remote not in (remotes.stdout or "").split(): return False - r = subprocess.run(["git", "push", remote, ref], cwd=repo_path, capture_output=True, text=True) + r = _git_run(["git", "push", remote, ref], cwd=repo_path) if r.returncode != 0: raise GitPushError( "git push failed", diff --git a/tests/test_entities_core_behavior.py b/tests/test_entities_core_behavior.py index d10d574..86fc52c 100644 --- a/tests/test_entities_core_behavior.py +++ b/tests/test_entities_core_behavior.py @@ -168,6 +168,23 @@ def test_build_events_read_rejects_missing_id(tmp_path: Path): _ = get_entity(repo, "b_widget_008") +def test_write_yaml_preserves_existing_file_when_dump_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + target = tmp_path / "entity.yml" + target.write_text("name: original\n", encoding="utf-8") + + def _boom(data, stream, sort_keys=False): + stream.write("name: partial") + raise RuntimeError("boom") + + monkeypatch.setattr(entities_mod.yaml, "safe_dump", _boom) + + with pytest.raises(RuntimeError, match="boom"): + entities_mod._write_yaml(target, {"name": "updated"}) + + assert target.read_text(encoding="utf-8") == "name: original\n" + assert [p.name for p in target.parent.iterdir()] == ["entity.yml"] + + def test_build_events_read_skips_malformed_json_lines(tmp_path: Path): repo = tmp_path / "repo" repo.mkdir(parents=True) diff --git a/tests/test_git_workflow.py b/tests/test_git_workflow.py index 15b7112..8e97117 100644 --- a/tests/test_git_workflow.py +++ b/tests/test_git_workflow.py @@ -10,7 +10,7 @@ import pytest from conftest import init_git_repo -from smallfactory.core.v1.gitutils import git_commit_paths +from smallfactory.core.v1.gitutils import git_commit_paths, git_identity_env def _git_has_commit(root: Path) -> bool: @@ -63,6 +63,34 @@ def test_git_commit_paths_noop_when_nothing_to_commit(tmp_path: Path): assert second == first +def test_git_commit_paths_uses_scoped_identity_without_mutating_process_env(tmp_path: Path): + repo = tmp_path / "repo" + repo.mkdir(parents=True) + init_git_repo(repo) + + entities_dir = repo / "entities" / "p_widget" + entities_dir.mkdir(parents=True) + f = entities_dir / "entity.yml" + f.write_text("name: Widget\n") + + prior_author = os.environ.get("GIT_AUTHOR_NAME") + prior_email = os.environ.get("GIT_AUTHOR_EMAIL") + + with git_identity_env("Jane Doe", "jane.doe@example.com"): + assert os.environ.get("GIT_AUTHOR_NAME") == prior_author + assert os.environ.get("GIT_AUTHOR_EMAIL") == prior_email + git_commit_paths(repo, [entities_dir], "[test] scoped identity") + + author = subprocess.run( + ["git", "log", "-n", "1", "--pretty=%an <%ae>"], + cwd=repo, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + assert author == "Jane Doe " + + @pytest.mark.skipif(not importlib.util.find_spec("flask"), reason="Flask not installed; web txn tests skipped") def test_run_repo_txn_autocommit_on_off(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): # Dynamically import web/app.py to access _run_repo_txn without starting the server diff --git a/web/app.py b/web/app.py index f7326ff..471aa4f 100644 --- a/web/app.py +++ b/web/app.py @@ -74,6 +74,7 @@ extract_invoice_part as vlm_extract_invoice_part, ) from smallfactory.core.v1.gitutils import git_push +from smallfactory.core.v1.gitutils import git_identity_env from smallfactory.core.v1.validate import validate_repo app = Flask(__name__) @@ -623,21 +624,9 @@ def _derive_name_from_email(em: str) -> str: @contextmanager def _with_git_identity(name: str, email: str): - """Temporarily set GIT_AUTHOR_* and GIT_COMMITTER_* for subprocess git commands.""" - keys = ['GIT_AUTHOR_NAME', 'GIT_AUTHOR_EMAIL', 'GIT_COMMITTER_NAME', 'GIT_COMMITTER_EMAIL'] - prev = {k: os.environ.get(k) for k in keys} - try: - os.environ['GIT_AUTHOR_NAME'] = name - os.environ['GIT_COMMITTER_NAME'] = name - os.environ['GIT_AUTHOR_EMAIL'] = email - os.environ['GIT_COMMITTER_EMAIL'] = email + """Scope git identity to gitutils subprocesses without mutating process-global env.""" + with git_identity_env(name, email): yield - finally: - for k, v in prev.items(): - if v is None: - os.environ.pop(k, None) - else: - os.environ[k] = v _GIT_REMOTE_CACHE: dict[str, tuple[float, bool]] = {} @@ -3957,6 +3946,7 @@ def stickers_batch(): error='ReportLab is not installed. Install web deps: pip install -r web/requirements.txt', size_text=size_text, dpi_text=dpi_text, + text_size_text=text_size_text, fields_text=fields_raw, sfids_text=sfids_text, ) @@ -4006,6 +3996,7 @@ def stickers_batch(): error=f'Failed to build PDF: {e}', size_text=size_text, dpi_text=dpi_text, + text_size_text=text_size_text, fields_text=fields_raw, sfids_text=sfids_text, )