#!/usr/bin/env python3 from __future__ import annotations import importlib.util import json import sys import unittest from pathlib import Path REPO = Path(__file__).resolve().parents[1] SCRIPT = REPO / "scripts" / "measure-duplicate-publishers.py" def load_module(): spec = importlib.util.spec_from_file_location("measure_duplicate_publishers", SCRIPT) if spec is None or spec.loader is None: raise RuntimeError(f"unable to load {SCRIPT}") module = importlib.util.module_from_spec(spec) sys.modules[spec.name] = module spec.loader.exec_module(module) return module class MeasureDuplicatePublishersTest(unittest.TestCase): def test_manifest_hash_stats_counts_duplicates_divergence_and_missing_hashes(self) -> None: module = load_module() records = [ {"group_sequence": 10, "received_unix_ms": 1_000, "blake3": "same", "source_node": "nuc-a"}, {"group_sequence": 10, "received_unix_ms": 1_001, "blake3": "same", "source_node": "nuc-b"}, {"group_sequence": 11, "received_unix_ms": 2_000, "blake3": "left", "source_node": "nuc-a"}, {"group_sequence": 11, "received_unix_ms": 2_001, "blake3": "right", "source_node": "nuc-b"}, {"group_sequence": 12, "received_unix_ms": 3_000}, ] stats = module.manifest_hash_stats(records, invalid_lines=2) self.assertEqual(5, stats["record_count"]) self.assertEqual(2, stats["invalid_lines"]) self.assertEqual(2, stats["sequence_count"]) self.assertEqual(2, stats["source_identity_count"]) self.assertEqual(["nuc-a", "nuc-b"], stats["source_identities"]) self.assertEqual(1, stats["missing_source_identity_records"]) self.assertEqual(1, stats["duplicate_hash_source_records"]) self.assertEqual(1, stats["duplicate_hash_sequences"]) self.assertEqual(1, stats["hash_divergent_sequences"]) self.assertEqual(1, stats["missing_hash_records"]) self.assertEqual(1_000, stats["first_received_unix_ms"]) self.assertEqual(3_000, stats["latest_received_unix_ms"]) def test_compare_manifest_hashes_proves_byte_for_byte_matches(self) -> None: module = load_module() comparison = module.compare_manifest_hashes( { "publisher-a": [ {"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"}, {"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"}, ], "publisher-b": [ {"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"}, {"group_sequence": 2, "blake3": "b", "source_node": "publisher-b"}, ], } ) self.assertTrue(comparison["byte_for_byte_hash_match"]) self.assertTrue(comparison["source_identity_ok"]) self.assertEqual(["publisher-a", "publisher-b"], comparison["source_identities"]) self.assertEqual(2, comparison["matching_sequence_count"]) self.assertEqual(0, comparison["divergent_sequence_count"]) self.assertEqual(0, comparison["missing_sequence_count"]) def test_compare_manifest_hashes_reports_divergent_sequences(self) -> None: module = load_module() comparison = module.compare_manifest_hashes( { "publisher-a": [ {"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"}, {"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"}, ], "publisher-b": [ {"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"}, {"group_sequence": 2, "blake3": "different", "source_node": "publisher-b"}, {"group_sequence": 3, "blake3": "extra", "source_node": "publisher-b"}, ], } ) self.assertFalse(comparison["byte_for_byte_hash_match"]) self.assertEqual(1, comparison["matching_sequence_count"]) self.assertEqual(1, comparison["divergent_sequence_count"]) self.assertEqual(1, comparison["missing_sequence_count"]) self.assertEqual(2, comparison["divergent_examples"][0]["sequence"]) self.assertEqual(["different"], comparison["divergent_examples"][0]["hashes"]["publisher-b"]) def test_compare_manifest_hashes_rejects_intra_manifest_divergence(self) -> None: module = load_module() comparison = module.compare_manifest_hashes( { "publisher-a": [ {"group_sequence": 1, "blake3": "same", "source_node": "publisher-a"}, ], "publisher-b": [ {"group_sequence": 1, "blake3": "same", "source_node": "publisher-b"}, {"group_sequence": 1, "blake3": "different", "source_node": "publisher-b"}, ], } ) self.assertFalse(comparison["byte_for_byte_hash_match"]) self.assertEqual(0, comparison["matching_sequence_count"]) self.assertEqual(1, comparison["divergent_sequence_count"]) self.assertEqual(["different", "same"], comparison["divergent_examples"][0]["hashes"]["publisher-b"]) def test_compare_manifest_hashes_rejects_mirrored_same_source_records(self) -> None: module = load_module() comparison = module.compare_manifest_hashes( { "nuc-a-buffer": [ {"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"}, ], "nuc-b-buffer": [ {"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"}, ], } ) self.assertFalse(comparison["byte_for_byte_hash_match"]) self.assertFalse(comparison["source_identity_ok"]) self.assertEqual(["archive-origin"], comparison["source_identities"]) def test_summary_requires_manifest_comparison_and_prometheus_series(self) -> None: module = load_module() summary = module.summarize( [ { "sample_unix_ms": 1_000, "publishers": { "a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, "b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, }, "manifest_comparison": { "source_identity_ok": True, "matching_sequence_count": 2, "divergent_sequence_count": 0, "byte_for_byte_hash_match": True, }, "prometheus": [ { "metric": "every_channel_ladder_archive_duplicate_hash_source_records", "ok": True, "series_present": True, "value": 2, }, { "metric": "every_channel_ladder_archive_hash_divergent_sequences", "ok": True, "series_present": True, "value": 0, }, ], }, { "sample_unix_ms": 31_000, "publishers": { "a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, "b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, }, "manifest_comparison": { "source_identity_ok": True, "matching_sequence_count": 4, "divergent_sequence_count": 0, "byte_for_byte_hash_match": True, }, "prometheus": [ { "metric": "every_channel_ladder_archive_duplicate_hash_source_records", "ok": True, "series_present": True, "value": 4, }, { "metric": "every_channel_ladder_archive_hash_divergent_sequences", "ok": True, "series_present": True, "value": 0, }, ], }, ] ) self.assertTrue(summary["ok"]) self.assertEqual(30_000, summary["elapsed_ms"]) self.assertEqual(2, summary["sample_count"]) self.assertEqual(4, summary["latest_manifest_comparison"]["matching_sequence_count"]) def test_summary_rejects_single_sample_and_manifest_hash_errors(self) -> None: module = load_module() summary = module.summarize( [ { "sample_unix_ms": 1_000, "publishers": { "a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, "b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True}, }, "manifests": { "a": { "ok": True, "hash_divergent_sequences": 1, "missing_hash_records": 1, "invalid_lines": 1, }, }, "manifest_comparison": { "source_identity_ok": True, "matching_sequence_count": 2, "divergent_sequence_count": 0, "byte_for_byte_hash_match": True, }, "prometheus": [ { "metric": "every_channel_ladder_archive_duplicate_hash_source_records", "ok": True, "series_present": True, "value": 2, }, ], }, ] ) self.assertFalse(summary["ok"]) self.assertIn("insufficient_elapsed_samples", summary["reasons"]) self.assertIn("manifest_hash_divergence_observed", summary["reasons"]) self.assertIn("manifest_hash_missing_records", summary["reasons"]) self.assertIn("manifest_invalid_lines", summary["reasons"]) def test_summary_rejects_missing_or_non_diverse_source_identity(self) -> None: module = load_module() summary = module.summarize( [ { "sample_unix_ms": 1_000, "manifest_comparison": { "source_identity_ok": False, "matching_sequence_count": 2, "divergent_sequence_count": 0, "byte_for_byte_hash_match": False, }, }, { "sample_unix_ms": 31_000, "manifest_comparison": { "source_identity_ok": False, "matching_sequence_count": 2, "divergent_sequence_count": 0, "byte_for_byte_hash_match": False, }, "prometheus": [ { "metric": "every_channel_archive_missing_source_identity_records", "ok": True, "series_present": True, "value": 2, }, ], }, ] ) self.assertFalse(summary["ok"]) self.assertIn("manifest_source_identity_missing_or_not_diverse", summary["reasons"]) self.assertIn("prometheus_source_identity_missing_nonzero", summary["reasons"]) def test_agent_manifest_url_builds_bounded_tailnet_endpoint(self) -> None: module = load_module() url = module.agent_manifest_url( "http://100.64.0.5:7799/", broadcast="la-kcop", track="0.m4s", role="publisher-buffer", max_bytes=4096, ) self.assertEqual( "http://100.64.0.5:7799/v1/archive-manifest?broadcast=la-kcop&track=0.m4s&max_bytes=4096&role=publisher-buffer", url, ) def test_parser_defaults_to_publisher_origin_proof_track(self) -> None: module = load_module() args = module.build_parser().parse_args([]) self.assertEqual("publisher.m4s", args.track) def test_parse_manifest_jsonl_tolerates_partial_first_tail_line(self) -> None: module = load_module() body = 'not-json-prefix{"group_sequence":1}\n{"group_sequence":2,"blake3":"b"}\n' records, invalid = module.parse_manifest_jsonl(body) self.assertEqual(0, invalid) self.assertEqual([2], [record["group_sequence"] for record in records]) if __name__ == "__main__": unittest.main()