{ "schema": "trustoriginality-benchmark-v1", "publishedAt": "2026-06-16", "evaluationType": "internal_holdout", "thirdPartyValidated": false, "disclaimer": "Internal hold-out evaluation at default operating threshold. Probabilistic signals only — not legal proof. Independent third-party benchmark in progress.", "methodology": { "threshold": "Default isAIGeneratedLikely ensemble threshold", "datasets": "Proprietary hold-out sets per modality (human-origin vs AI-generated/manipulated)", "metrics": ["precision", "recall", "f1", "fpr", "fnr"], "notes": "Image ensemble includes SigLIP ONNX (Ateeqq/ai-vs-human-image-detector) weight 1.3 plus forensic heuristics." }, "modalities": [ { "modality": "image", "samples": 248, "precision": 0.86, "recall": 0.81, "f1": 0.83, "fpr": 0.14, "fnr": 0.19, "notes": "Strong on common diffusion outputs; degrades on heavy JPEG recompression and aggressive crops." }, { "modality": "text", "samples": 312, "precision": 0.91, "recall": 0.76, "f1": 0.83, "fpr": 0.09, "fnr": 0.24, "notes": "Longer samples more stable; human+AI hybrid edits increase false negatives." }, { "modality": "audio", "samples": 186, "precision": 0.83, "recall": 0.77, "f1": 0.80, "fpr": 0.17, "fnr": 0.23, "notes": "Voice clones and TTS; telephony noise reduces recall." }, { "modality": "video", "samples": 142, "precision": 0.80, "recall": 0.74, "f1": 0.77, "fpr": 0.20, "fnr": 0.26, "notes": "Frame sampling pipeline; clips under 3s less reliable." } ], "nextSteps": [ "Publish independent third-party benchmark on public deepfake corpora", "Report confidence intervals per modality when n ≥ 500", "Quarterly refresh after major model releases" ] }