Add duplicate publisher determinism proof
This commit is contained in:
parent
5d0f3077d3
commit
91dad67fc2
18 changed files with 21569 additions and 595 deletions
320
scripts/measure-duplicate-publishers-test.py
Normal file
320
scripts/measure-duplicate-publishers-test.py
Normal file
|
|
@ -0,0 +1,320 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO = Path(__file__).resolve().parents[1]
|
||||
SCRIPT = REPO / "scripts" / "measure-duplicate-publishers.py"
|
||||
|
||||
|
||||
def load_module():
|
||||
spec = importlib.util.spec_from_file_location("measure_duplicate_publishers", SCRIPT)
|
||||
if spec is None or spec.loader is None:
|
||||
raise RuntimeError(f"unable to load {SCRIPT}")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[spec.name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
class MeasureDuplicatePublishersTest(unittest.TestCase):
|
||||
def test_manifest_hash_stats_counts_duplicates_divergence_and_missing_hashes(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
records = [
|
||||
{"group_sequence": 10, "received_unix_ms": 1_000, "blake3": "same", "source_node": "nuc-a"},
|
||||
{"group_sequence": 10, "received_unix_ms": 1_001, "blake3": "same", "source_node": "nuc-b"},
|
||||
{"group_sequence": 11, "received_unix_ms": 2_000, "blake3": "left", "source_node": "nuc-a"},
|
||||
{"group_sequence": 11, "received_unix_ms": 2_001, "blake3": "right", "source_node": "nuc-b"},
|
||||
{"group_sequence": 12, "received_unix_ms": 3_000},
|
||||
]
|
||||
|
||||
stats = module.manifest_hash_stats(records, invalid_lines=2)
|
||||
|
||||
self.assertEqual(5, stats["record_count"])
|
||||
self.assertEqual(2, stats["invalid_lines"])
|
||||
self.assertEqual(2, stats["sequence_count"])
|
||||
self.assertEqual(2, stats["source_identity_count"])
|
||||
self.assertEqual(["nuc-a", "nuc-b"], stats["source_identities"])
|
||||
self.assertEqual(1, stats["missing_source_identity_records"])
|
||||
self.assertEqual(1, stats["duplicate_hash_source_records"])
|
||||
self.assertEqual(1, stats["duplicate_hash_sequences"])
|
||||
self.assertEqual(1, stats["hash_divergent_sequences"])
|
||||
self.assertEqual(1, stats["missing_hash_records"])
|
||||
self.assertEqual(1_000, stats["first_received_unix_ms"])
|
||||
self.assertEqual(3_000, stats["latest_received_unix_ms"])
|
||||
|
||||
def test_compare_manifest_hashes_proves_byte_for_byte_matches(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
comparison = module.compare_manifest_hashes(
|
||||
{
|
||||
"publisher-a": [
|
||||
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"},
|
||||
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"},
|
||||
],
|
||||
"publisher-b": [
|
||||
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"},
|
||||
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-b"},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertTrue(comparison["byte_for_byte_hash_match"])
|
||||
self.assertTrue(comparison["source_identity_ok"])
|
||||
self.assertEqual(["publisher-a", "publisher-b"], comparison["source_identities"])
|
||||
self.assertEqual(2, comparison["matching_sequence_count"])
|
||||
self.assertEqual(0, comparison["divergent_sequence_count"])
|
||||
self.assertEqual(0, comparison["missing_sequence_count"])
|
||||
|
||||
def test_compare_manifest_hashes_reports_divergent_sequences(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
comparison = module.compare_manifest_hashes(
|
||||
{
|
||||
"publisher-a": [
|
||||
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-a"},
|
||||
{"group_sequence": 2, "blake3": "b", "source_node": "publisher-a"},
|
||||
],
|
||||
"publisher-b": [
|
||||
{"group_sequence": 1, "blake3": "a", "source_node": "publisher-b"},
|
||||
{"group_sequence": 2, "blake3": "different", "source_node": "publisher-b"},
|
||||
{"group_sequence": 3, "blake3": "extra", "source_node": "publisher-b"},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertFalse(comparison["byte_for_byte_hash_match"])
|
||||
self.assertEqual(1, comparison["matching_sequence_count"])
|
||||
self.assertEqual(1, comparison["divergent_sequence_count"])
|
||||
self.assertEqual(1, comparison["missing_sequence_count"])
|
||||
self.assertEqual(2, comparison["divergent_examples"][0]["sequence"])
|
||||
self.assertEqual(["different"], comparison["divergent_examples"][0]["hashes"]["publisher-b"])
|
||||
|
||||
def test_compare_manifest_hashes_rejects_intra_manifest_divergence(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
comparison = module.compare_manifest_hashes(
|
||||
{
|
||||
"publisher-a": [
|
||||
{"group_sequence": 1, "blake3": "same", "source_node": "publisher-a"},
|
||||
],
|
||||
"publisher-b": [
|
||||
{"group_sequence": 1, "blake3": "same", "source_node": "publisher-b"},
|
||||
{"group_sequence": 1, "blake3": "different", "source_node": "publisher-b"},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertFalse(comparison["byte_for_byte_hash_match"])
|
||||
self.assertEqual(0, comparison["matching_sequence_count"])
|
||||
self.assertEqual(1, comparison["divergent_sequence_count"])
|
||||
self.assertEqual(["different", "same"], comparison["divergent_examples"][0]["hashes"]["publisher-b"])
|
||||
|
||||
def test_compare_manifest_hashes_rejects_mirrored_same_source_records(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
comparison = module.compare_manifest_hashes(
|
||||
{
|
||||
"nuc-a-buffer": [
|
||||
{"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"},
|
||||
],
|
||||
"nuc-b-buffer": [
|
||||
{"group_sequence": 1, "blake3": "same", "source_node": "archive-origin"},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
self.assertFalse(comparison["byte_for_byte_hash_match"])
|
||||
self.assertFalse(comparison["source_identity_ok"])
|
||||
self.assertEqual(["archive-origin"], comparison["source_identities"])
|
||||
|
||||
def test_summary_requires_manifest_comparison_and_prometheus_series(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
summary = module.summarize(
|
||||
[
|
||||
{
|
||||
"sample_unix_ms": 1_000,
|
||||
"publishers": {
|
||||
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
},
|
||||
"manifest_comparison": {
|
||||
"source_identity_ok": True,
|
||||
"matching_sequence_count": 2,
|
||||
"divergent_sequence_count": 0,
|
||||
"byte_for_byte_hash_match": True,
|
||||
},
|
||||
"prometheus": [
|
||||
{
|
||||
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 2,
|
||||
},
|
||||
{
|
||||
"metric": "every_channel_ladder_archive_hash_divergent_sequences",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"sample_unix_ms": 31_000,
|
||||
"publishers": {
|
||||
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
},
|
||||
"manifest_comparison": {
|
||||
"source_identity_ok": True,
|
||||
"matching_sequence_count": 4,
|
||||
"divergent_sequence_count": 0,
|
||||
"byte_for_byte_hash_match": True,
|
||||
},
|
||||
"prometheus": [
|
||||
{
|
||||
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 4,
|
||||
},
|
||||
{
|
||||
"metric": "every_channel_ladder_archive_hash_divergent_sequences",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 0,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
self.assertTrue(summary["ok"])
|
||||
self.assertEqual(30_000, summary["elapsed_ms"])
|
||||
self.assertEqual(2, summary["sample_count"])
|
||||
self.assertEqual(4, summary["latest_manifest_comparison"]["matching_sequence_count"])
|
||||
|
||||
def test_summary_rejects_single_sample_and_manifest_hash_errors(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
summary = module.summarize(
|
||||
[
|
||||
{
|
||||
"sample_unix_ms": 1_000,
|
||||
"publishers": {
|
||||
"a": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
"b": {"health_ok": True, "metrics_ok": True, "duplicate_metrics_present": True},
|
||||
},
|
||||
"manifests": {
|
||||
"a": {
|
||||
"ok": True,
|
||||
"hash_divergent_sequences": 1,
|
||||
"missing_hash_records": 1,
|
||||
"invalid_lines": 1,
|
||||
},
|
||||
},
|
||||
"manifest_comparison": {
|
||||
"source_identity_ok": True,
|
||||
"matching_sequence_count": 2,
|
||||
"divergent_sequence_count": 0,
|
||||
"byte_for_byte_hash_match": True,
|
||||
},
|
||||
"prometheus": [
|
||||
{
|
||||
"metric": "every_channel_ladder_archive_duplicate_hash_source_records",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 2,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
self.assertFalse(summary["ok"])
|
||||
self.assertIn("insufficient_elapsed_samples", summary["reasons"])
|
||||
self.assertIn("manifest_hash_divergence_observed", summary["reasons"])
|
||||
self.assertIn("manifest_hash_missing_records", summary["reasons"])
|
||||
self.assertIn("manifest_invalid_lines", summary["reasons"])
|
||||
|
||||
def test_summary_rejects_missing_or_non_diverse_source_identity(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
summary = module.summarize(
|
||||
[
|
||||
{
|
||||
"sample_unix_ms": 1_000,
|
||||
"manifest_comparison": {
|
||||
"source_identity_ok": False,
|
||||
"matching_sequence_count": 2,
|
||||
"divergent_sequence_count": 0,
|
||||
"byte_for_byte_hash_match": False,
|
||||
},
|
||||
},
|
||||
{
|
||||
"sample_unix_ms": 31_000,
|
||||
"manifest_comparison": {
|
||||
"source_identity_ok": False,
|
||||
"matching_sequence_count": 2,
|
||||
"divergent_sequence_count": 0,
|
||||
"byte_for_byte_hash_match": False,
|
||||
},
|
||||
"prometheus": [
|
||||
{
|
||||
"metric": "every_channel_archive_missing_source_identity_records",
|
||||
"ok": True,
|
||||
"series_present": True,
|
||||
"value": 2,
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
self.assertFalse(summary["ok"])
|
||||
self.assertIn("manifest_source_identity_missing_or_not_diverse", summary["reasons"])
|
||||
self.assertIn("prometheus_source_identity_missing_nonzero", summary["reasons"])
|
||||
|
||||
def test_agent_manifest_url_builds_bounded_tailnet_endpoint(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
url = module.agent_manifest_url(
|
||||
"http://100.64.0.5:7799/",
|
||||
broadcast="la-kcop",
|
||||
track="0.m4s",
|
||||
role="publisher-buffer",
|
||||
max_bytes=4096,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
"http://100.64.0.5:7799/v1/archive-manifest?broadcast=la-kcop&track=0.m4s&max_bytes=4096&role=publisher-buffer",
|
||||
url,
|
||||
)
|
||||
|
||||
def test_parser_defaults_to_publisher_origin_proof_track(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
args = module.build_parser().parse_args([])
|
||||
|
||||
self.assertEqual("publisher.m4s", args.track)
|
||||
|
||||
def test_parse_manifest_jsonl_tolerates_partial_first_tail_line(self) -> None:
|
||||
module = load_module()
|
||||
|
||||
body = 'not-json-prefix{"group_sequence":1}\n{"group_sequence":2,"blake3":"b"}\n'
|
||||
records, invalid = module.parse_manifest_jsonl(body)
|
||||
|
||||
self.assertEqual(0, invalid)
|
||||
self.assertEqual([2], [record["group_sequence"] for record in records])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue