Skip to content

Commit 0434d9c

Browse files
committed
fix(sync): stabilize windows checksum writes
Signed-off-by: phernandez <paul@basicmachines.co>
1 parent 794dcc6 commit 0434d9c

3 files changed

Lines changed: 49 additions & 8 deletions

File tree

src/basic_memory/file_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,12 @@ async def write_file_atomic(path: FilePath, content: str) -> None:
114114
temp_path = path_obj.with_suffix(".tmp")
115115

116116
try:
117-
# Use aiofiles for non-blocking write
118-
async with aiofiles.open(temp_path, mode="w", encoding="utf-8") as f:
117+
# Trigger: Basic Memory writes markdown and metadata from normalized Python strings.
118+
# Why: Windows text mode would translate "\n" into "\r\n", which makes the
119+
# persisted bytes diverge from the in-memory content we index and hash.
120+
# Outcome: force LF on every platform so file bytes, checksums, and move detection
121+
# stay deterministic across local and CI environments.
122+
async with aiofiles.open(temp_path, mode="w", encoding="utf-8", newline="\n") as f:
119123
await f.write(content)
120124

121125
# Atomic rename (this is fast, doesn't need async)
@@ -168,7 +172,7 @@ async def format_markdown_builtin(path: Path) -> Optional[str]:
168172

169173
# Only write if content changed
170174
if formatted_content != content:
171-
async with aiofiles.open(path, mode="w", encoding="utf-8") as f:
175+
async with aiofiles.open(path, mode="w", encoding="utf-8", newline="\n") as f:
172176
await f.write(formatted_content)
173177

174178
logger.debug(

src/basic_memory/services/file_service.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,20 @@ async def write_file(self, path: FilePath, content: str) -> str:
208208

209209
await file_utils.write_file_atomic(full_path, content)
210210

211-
final_content = content
212211
if self.app_config:
213212
formatted_content = await file_utils.format_file(
214213
full_path, self.app_config, is_markdown=self.is_markdown(path)
215214
)
216215
if formatted_content is not None:
217-
final_content = formatted_content # pragma: no cover
218-
219-
checksum = await file_utils.compute_checksum(final_content)
216+
pass # pragma: no cover
217+
218+
# Trigger: formatters and platform-specific text writers can change the
219+
# persisted bytes even when the logical content string is the same.
220+
# Why: sync and move detection compare against on-disk checksums, not
221+
# the pre-write Python string.
222+
# Outcome: return the checksum of the actual stored file so callers do
223+
# not record a hash that immediately disagrees with the file.
224+
checksum = await self.compute_checksum(full_path)
220225
logger.debug(f"File write completed path={full_path}, {checksum=}")
221226
return checksum
222227

@@ -478,8 +483,12 @@ async def update_frontmatter_with_result(
478483
if formatted_content is not None:
479484
content_for_checksum = formatted_content # pragma: no cover
480485

486+
# Trigger: frontmatter normalization may persist bytes that differ from the
487+
# in-memory string because of formatter output or platform newline handling.
488+
# Why: follow-up scans and checksum-based move detection read raw bytes from disk.
489+
# Outcome: the returned checksum always matches the file that was just written.
481490
return FrontmatterUpdateResult(
482-
checksum=await file_utils.compute_checksum(content_for_checksum),
491+
checksum=await self.compute_checksum(full_path),
483492
content=content_for_checksum,
484493
)
485494

tests/services/test_file_service.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pytest
66

7+
from basic_memory import file_utils
78
from basic_memory.services.exceptions import FileOperationError
89
from basic_memory.services.file_service import FileService
910

@@ -167,6 +168,33 @@ async def test_write_unicode_content(tmp_path: Path, file_service: FileService):
167168
assert content == test_content
168169

169170

171+
@pytest.mark.asyncio
172+
async def test_update_frontmatter_checksum_matches_persisted_bytes(
173+
tmp_path: Path, file_service: FileService, monkeypatch
174+
):
175+
"""Frontmatter writes should hash the stored file, not the pre-write string."""
176+
test_path = tmp_path / "note.md"
177+
test_path.write_text("# Note\nBody\n", encoding="utf-8")
178+
179+
async def fake_write_file_atomic(path: Path, content: str) -> None:
180+
# Trigger: simulate a writer that persists CRLF bytes like Windows text mode.
181+
# Why: the regression happened when the stored bytes diverged from the LF string
182+
# used to build the checksum.
183+
# Outcome: this test proves FileService returns the checksum for the stored file.
184+
persisted = content.replace("\n", "\r\n").encode("utf-8")
185+
path.parent.mkdir(parents=True, exist_ok=True)
186+
path.write_bytes(persisted)
187+
188+
monkeypatch.setattr(file_utils, "write_file_atomic", fake_write_file_atomic)
189+
190+
result = await file_service.update_frontmatter_with_result(
191+
test_path,
192+
{"title": "Note", "type": "note"},
193+
)
194+
195+
assert result.checksum == await file_service.compute_checksum(test_path)
196+
197+
170198
@pytest.mark.asyncio
171199
async def test_read_file_content(tmp_path: Path, file_service: FileService):
172200
"""Test read_file_content returns just the content without checksum."""

0 commit comments

Comments
 (0)