{"records":[{"benchmark_source":"HadCRUT5 / NASA GISS observed temperature record","benchmark_value":0.96,"ce_value":0.92,"error_pct":4.2,"id":"VAL-CLM-001","metric":"GMST anomaly 1990\u20132020 (\u00b0C above 1850\u20131900)","model_id":"MDL-001","notes":"CE ClimateModelService slightly underestimates 1990\u20132020 warming by 0.04\u00b0C (4.2%). Within IPCC model ensemble spread.","pass_threshold":10.0,"passed":true,"scenario_tested":"historical","test_date":"2026-01-15","test_type":"Historical backtest"},{"benchmark_source":"IPCC AR6 WG1 Table SPM.1 \u2014 SSP1-1.9 2100 median temperature","benchmark_value":1.5,"ce_value":1.5,"error_pct":0.0,"id":"VAL-CLM-002","metric":"2100 GMST anomaly under SSP1-1.9 (\u00b0C)","model_id":"MDL-001","notes":"CE SSP1-1.9 pathway median temperature matches IPCC AR6 central estimate exactly (by calibration). Spread (P10\u2013P90) not yet exposed in UI.","pass_threshold":5.0,"passed":true,"scenario_tested":"SSP1-1.9","test_date":"2026-01-15","test_type":"Scenario cross-validation vs IPCC AR6"},{"benchmark_source":"IPCC AR6 WG1 Table SPM.1 \u2014 SSP5-8.5 2100 median temperature","benchmark_value":4.4,"ce_value":4.3,"error_pct":2.3,"id":"VAL-CLM-003","metric":"2100 GMST anomaly under SSP5-8.5 (\u00b0C)","model_id":"MDL-001","notes":"CE SSP5-8.5 high warming scenario within 0.1\u00b0C of IPCC AR6 central estimate.","pass_threshold":5.0,"passed":true,"scenario_tested":"SSP5-8.5","test_date":"2026-01-15","test_type":"Scenario cross-validation vs IPCC AR6"},{"benchmark_source":"NGFS Phase IV \u2014 Current Policies scenario 2100 warming","benchmark_value":3.0,"ce_value":3.1,"error_pct":3.3,"id":"VAL-CLM-004","metric":"2100 temperature under current policies (\u00b0C)","model_id":"MDL-001","notes":"CE current-policies warming projection within 0.1\u00b0C of NGFS Phase IV current policies median.","pass_threshold":10.0,"passed":true,"scenario_tested":"current_policies","test_date":"2026-01-15","test_type":"Benchmark comparison vs NGFS"},{"benchmark_source":"Nordhaus DICE-2023 optimal path GDP loss at 2\u00b0C","benchmark_value":1.15,"ce_value":1.07,"error_pct":7.0,"id":"VAL-ECO-001","metric":"GDP damage as % baseline at 2\u00b0C warming","model_id":"MDL-002","notes":"CE EconomicModelService gives 1.07% GDP damage vs DICE 1.15% at 2\u00b0C. Within acceptable range. Difference due to slightly lower \u03b1 calibration.","pass_threshold":15.0,"passed":true,"scenario_tested":"2C_pathway","test_date":"2026-02-01","test_type":"Benchmark comparison vs DICE-2023"},{"benchmark_source":"NGFS Phase IV \u2014 NZE scenario GDP impact vs baseline","benchmark_value":-1.8,"ce_value":-2.1,"error_pct":16.7,"id":"VAL-ECO-002","metric":"GDP deviation from baseline under NZE scenario 2050 (%)","model_id":"MDL-002","notes":"CE slightly overestimates transition cost vs NGFS NZE. 16.7% error \u2014 within threshold but worth monitoring. May reflect different carbon price trajectory assumption.","pass_threshold":20.0,"passed":true,"scenario_tested":"net_zero_2050","test_date":"2026-02-01","test_type":"Benchmark comparison vs IEA/NGFS"},{"benchmark_source":"Swiss Re sigma database 2000\u20132022 (natural catastrophe insured losses)","benchmark_value":105.0,"ce_value":98.0,"error_pct":6.7,"id":"VAL-DAM-001","metric":"Global insured natural cat loss 5-year average ($/yr)","model_id":"MDL-003","notes":"CE DamageModelService AAL slightly below observed insured losses (underestimates by ~7bn/yr). Non-modelled perils and demand surge are likely explanation.","pass_threshold":20.0,"passed":true,"scenario_tested":"historical_2000_2022","test_date":"2026-01-20","test_type":"Historical validation \u2014 observed damage record"},{"benchmark_source":"Howard & Sterner (2017) meta-analysis central estimate at 3\u00b0C","benchmark_value":2.5,"ce_value":2.4,"error_pct":4.0,"id":"VAL-DAM-002","metric":"GDP damage fraction at 3\u00b0C warming (%)","model_id":"MDL-003","notes":"CE damage at 3\u00b0C within 4% of Howard-Sterner meta-analysis central estimate. Note: Burke et al. (2015) would give ~23% \u2014 CE uses more conservative DICE calibration.","pass_threshold":15.0,"passed":true,"scenario_tested":"3C_warming","test_date":"2026-01-20","test_type":"Benchmark comparison vs literature"},{"benchmark_source":"CE internal convergence analysis (N=500, 1000, 2000, 5000, 10000)","benchmark_value":0.0,"ce_value":1.3,"error_pct":1.3,"id":"VAL-MC-001","metric":"P5/P95 stability: % change in P5 temperature from N=2000 to N=10000","model_id":"MDL-005","notes":"N=2000 gives P5/P95 within \u00b11.3% of N=10000 reference run. Converged for decision-grade use.","pass_threshold":2.0,"passed":true,"scenario_tested":"convergence_study","test_date":"2026-01-10","test_type":"Convergence test"},{"benchmark_source":"IPCC AR6 WG3 Figure SPM.7 \u2014 2030 abatement potential in C3 scenario","benchmark_value":35.0,"ce_value":31.0,"error_pct":11.4,"id":"VAL-GAP-001","metric":"Net abatement available by 2030 vs 2019 baseline (GtCO\u2082e/yr)","model_id":"MDL-009","notes":"CE GapAccountingEngine gives 31 GtCO\u2082e/yr net by 2030 vs IPCC AR6 C3 scenario 35 GtCO\u2082e/yr. Difference partly attributable to conservative de-duplication factor \u03b4=0.22.","pass_threshold":20.0,"passed":true,"scenario_tested":"2C_pathway_2030","test_date":"2026-02-01","test_type":"Scenario cross-validation vs IPCC"},{"benchmark_source":"GCAM 6.0 NZE 2050 scenario sectoral abatement breakdown","benchmark_value":47.0,"ce_value":43.0,"error_pct":8.5,"id":"VAL-GAP-002","metric":"Share of energy sector in total 2050 abatement (%)","model_id":"MDL-009","notes":"CE energy sector abatement share (43%) vs GCAM 6.0 (47%). Within acceptable range. CE has slightly higher weight on land use/agriculture mitigation.","pass_threshold":15.0,"passed":true,"scenario_tested":"net_zero_2050","test_date":"2026-02-01","test_type":"Benchmark comparison vs GCAM"}],"summary":{"benchmark_sources":["IPCC AR6 WG1, WG3","NGFS Phase IV","IEA NZE 2050","DICE-2023","GCAM 6.0","Swiss Re sigma","Howard & Sterner (2017)"],"failed":0,"last_test_run":"2026-02-10","mean_error_pct":5.9,"models_tested":["MDL-002","MDL-003","MDL-001","MDL-005","MDL-009"],"passed":11,"total_tests":11}}
