every.channel/scripts/measure-duplicate-publishers-test.py
every.channel 91dad67fc2
Some checks failed
deploy-cloudflare / checks (push) Failing after 3s
ci-gates / checks (push) Failing after 5s
deploy-cloudflare / deploy (push) Has been skipped
Add duplicate publisher determinism proof
2026-06-10 03:33:46 -07:00

320 lines
13 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import importlib.util
import json
import sys
import unittest
from pathlib import Path
REPO = Path(__file__).resolve().parents[1]
SCRIPT = REPO / "scripts" / "measure-duplicate-publishers.py"
def load_module():
spec = importlib.util.spec_from_file_location("measure_duplicate_publishers", SCRIPT)
if spec is None or spec.loader is None:
raise RuntimeError(f"unable to load {SCRIPT}")
module = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module
class MeasureDuplicatePublishersTest(unittest.TestCase):
def test_manifest_hash_stats_counts_duplicates_divergence_and_missing_hashes(self) -> None:
module = load_module()
records = [
{"group_sequence": 10, "received_unix_ms": 1_000, "blake3": "same", "source_node": "nuc-a"},
{"group_sequence": 10, "received_unix_ms": 1_001, "blake3": "same", "source_node": "nuc-b"},
{"group_sequence": 11, "received_unix_ms": 2_000, "blake3": "left", "source_node": "nuc-a"},
{"group_sequence": 11, "received_unix_ms": 2_001, "blake3": "right", "source_node": "nuc-b"},
{"group_sequence": 12, "received_unix_ms": 3_000},
]
stats = module.manifest_hash_stats(records, invalid_lines=2)
self.assertEqual(5, stats["record_count"])
self.assertEqual(2, stats["invalid_lines"])
self.assertEqual(2, stats["sequence_count"])
self.assertEqual(2, stats["source_identity_count"])
self.assertEqual(["nuc-a", "nuc-b"], stats["source_identities"])
self.assertEqual(1, stats["missing_source_identity_records"])
self.assertEqual(1, stats["duplicate_hash_source_records"])
self.assertEqual(1, stats["duplicate_hash_sequences"])
self.assertEqual(1, stats["hash_divergent_sequences"])
self.assertEqual(1, stats["missing_hash_records"])
self.assertEqual(1_000, stats["first_received_unix_ms"])
self.assertEqual(3_000, stats["latest_received_unix_ms"])
def test_compare_manifest_hashes_proves_byte_for_byte_matches(self) -> None:
module = load_module()
comparison = module.compare_manifest_hashes(
{
"publisher-a": [
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"},
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"},
],
"publisher-b": [
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"},
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-b"},
],
}
)
self.assertTrue(comparison["byte_for_byte_hash_match"])
self.assertTrue(comparison["source_identity_ok"])
self.assertEqual(["publisher-a", "publisher-b"], comparison["source_identities"])
self.assertEqual(2, comparison["matching_sequence_count"])
self.assertEqual(0, comparison["divergent_sequence_count"])
self.assertEqual(0, comparison["missing_sequence_count"])
def test_compare_manifest_hashes_reports_divergent_sequences(self) -> None:
module = load_module()
comparison = module.compare_manifest_hashes(
{
"publisher-a": [
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"},
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"},
],
"publisher-b": [
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"},
{"group_sequence": 2, "blake3": "different", "source_node": "publisher-b"},
{"group_sequence": 3, "blake3": "extra", "source_node": "publisher-b"},
],
}
)
self.assertFalse(comparison["byte_for_byte_hash_match"])
self.assertEqual(1, comparison["matching_sequence_count"])
self.assertEqual(1, comparison["divergent_sequence_count"])
self.assertEqual(1, comparison["missing_sequence_count"])
self.assertEqual(2, comparison["divergent_examples"][0]["sequence"])
self.assertEqual(["different"], comparison["divergent_examples"][0]["hashes"]["publisher-b"])
def test_compare_manifest_hashes_rejects_intra_manifest_divergence(self) -> None:
module = load_module()
comparison = module.compare_manifest_hashes(
{
"publisher-a": [
{"group_sequence": 1, "blake3": "same", "source_node": "publisher-a"},
],
"publisher-b": [
{"group_sequence": 1, "blake3": "same", "source_node": "publisher-b"},
{"group_sequence": 1, "blake3": "different", "source_node": "publisher-b"},
],
}
)
self.assertFalse(comparison["byte_for_byte_hash_match"])
self.assertEqual(0, comparison["matching_sequence_count"])
self.assertEqual(1, comparison["divergent_sequence_count"])
self.assertEqual(["different", "same"], comparison["divergent_examples"][0]["hashes"]["publisher-b"])
def test_compare_manifest_hashes_rejects_mirrored_same_source_records(self) -> None:
module = load_module()
comparison = module.compare_manifest_hashes(
{
"nuc-a-buffer": [
{"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"},
],
"nuc-b-buffer": [
{"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"},
],
}
)
self.assertFalse(comparison["byte_for_byte_hash_match"])
self.assertFalse(comparison["source_identity_ok"])
self.assertEqual(["archive-origin"], comparison["source_identities"])
def test_summary_requires_manifest_comparison_and_prometheus_series(self) -> None:
module = load_module()
summary = module.summarize(
[
{
"sample_unix_ms": 1_000,
"publishers": {
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
},
"manifest_comparison": {
"source_identity_ok": True,
"matching_sequence_count": 2,
"divergent_sequence_count": 0,
"byte_for_byte_hash_match": True,
},
"prometheus": [
{
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
"ok": True,
"series_present": True,
"value": 2,
},
{
"metric": "every_channel_ladder_archive_hash_divergent_sequences",
"ok": True,
"series_present": True,
"value": 0,
},
],
},
{
"sample_unix_ms": 31_000,
"publishers": {
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
},
"manifest_comparison": {
"source_identity_ok": True,
"matching_sequence_count": 4,
"divergent_sequence_count": 0,
"byte_for_byte_hash_match": True,
},
"prometheus": [
{
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
"ok": True,
"series_present": True,
"value": 4,
},
{
"metric": "every_channel_ladder_archive_hash_divergent_sequences",
"ok": True,
"series_present": True,
"value": 0,
},
],
},
]
)
self.assertTrue(summary["ok"])
self.assertEqual(30_000, summary["elapsed_ms"])
self.assertEqual(2, summary["sample_count"])
self.assertEqual(4, summary["latest_manifest_comparison"]["matching_sequence_count"])
def test_summary_rejects_single_sample_and_manifest_hash_errors(self) -> None:
module = load_module()
summary = module.summarize(
[
{
"sample_unix_ms": 1_000,
"publishers": {
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
},
"manifests": {
"a": {
"ok": True,
"hash_divergent_sequences": 1,
"missing_hash_records": 1,
"invalid_lines": 1,
},
},
"manifest_comparison": {
"source_identity_ok": True,
"matching_sequence_count": 2,
"divergent_sequence_count": 0,
"byte_for_byte_hash_match": True,
},
"prometheus": [
{
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
"ok": True,
"series_present": True,
"value": 2,
},
],
},
]
)
self.assertFalse(summary["ok"])
self.assertIn("insufficient_elapsed_samples", summary["reasons"])
self.assertIn("manifest_hash_divergence_observed", summary["reasons"])
self.assertIn("manifest_hash_missing_records", summary["reasons"])
self.assertIn("manifest_invalid_lines", summary["reasons"])
def test_summary_rejects_missing_or_non_diverse_source_identity(self) -> None:
module = load_module()
summary = module.summarize(
[
{
"sample_unix_ms": 1_000,
"manifest_comparison": {
"source_identity_ok": False,
"matching_sequence_count": 2,
"divergent_sequence_count": 0,
"byte_for_byte_hash_match": False,
},
},
{
"sample_unix_ms": 31_000,
"manifest_comparison": {
"source_identity_ok": False,
"matching_sequence_count": 2,
"divergent_sequence_count": 0,
"byte_for_byte_hash_match": False,
},
"prometheus": [
{
"metric": "every_channel_archive_missing_source_identity_records",
"ok": True,
"series_present": True,
"value": 2,
},
],
},
]
)
self.assertFalse(summary["ok"])
self.assertIn("manifest_source_identity_missing_or_not_diverse", summary["reasons"])
self.assertIn("prometheus_source_identity_missing_nonzero", summary["reasons"])
def test_agent_manifest_url_builds_bounded_tailnet_endpoint(self) -> None:
module = load_module()
url = module.agent_manifest_url(
"http://100.64.0.5:7799/",
broadcast="la-kcop",
track="0.m4s",
role="publisher-buffer",
max_bytes=4096,
)
self.assertEqual(
"http://100.64.0.5:7799/v1/archive-manifest?broadcast=la-kcop&track=0.m4s&max_bytes=4096&role=publisher-buffer",
url,
)
def test_parser_defaults_to_publisher_origin_proof_track(self) -> None:
module = load_module()
args = module.build_parser().parse_args([])
self.assertEqual("publisher.m4s", args.track)
def test_parse_manifest_jsonl_tolerates_partial_first_tail_line(self) -> None:
module = load_module()
body = 'not-json-prefix{"group_sequence":1}\n{"group_sequence":2,"blake3":"b"}\n'
records, invalid = module.parse_manifest_jsonl(body)
self.assertEqual(0, invalid)
self.assertEqual([2], [record["group_sequence"] for record in records])
if __name__ == "__main__":
unittest.main()