-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.html
More file actions
1 lines (1 loc) · 13.1 KB
/
index.html
File metadata and controls
1 lines (1 loc) · 13.1 KB
1
<!DOCTYPE html><html lang="en" data-theme="dark"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1, viewport-fit=cover"/><link rel="preload" as="image" href="/assets/models/claude.png"/><link rel="preload" as="image" href="/assets/models/gpt.png"/><link rel="stylesheet" href="/_next/static/chunks/14fqz35xbxfy..css" data-precedence="next"/><link rel="preload" as="script" fetchPriority="low" href="/_next/static/chunks/11kjahy2ntf0n.js"/><script src="/_next/static/chunks/007q~d6g010tn.js" async=""></script><script src="/_next/static/chunks/07lhk_q6pmm3r.js" async=""></script><script src="/_next/static/chunks/turbopack-0cn-na9ggl2px.js" async=""></script><script src="/_next/static/chunks/0msaqe10s_at0.js" async=""></script><script src="/_next/static/chunks/02r5woef2zim9.js" async=""></script><script src="/_next/static/chunks/15b6_hm_8-7tp.js" async=""></script><script src="/_next/static/chunks/0bm4kqsyrh~il.js" async=""></script><title>ValueArena</title><meta name="description" content="Cross-constitution Elo rankings for language models, judged via EigenBench."/><script>(function(){try{var t=localStorage.getItem('va-theme');if(!t){t=window.matchMedia('(prefers-color-scheme: light)').matches?'light':'dark';}document.documentElement.dataset.theme=t;}catch(e){}})();</script><script src="/_next/static/chunks/03~yq9q893hmn.js" noModule=""></script></head><body><div hidden=""><!--$--><!--/$--></div><header class="va-header"><a href="/" class="va-brand"><h1><span>Value</span>Arena</h1></a><nav class="va-nav"><a href="/methodology/">Methodology</a><a href="https://github.com/ValueArena/ValueArena.github.io" target="_blank" rel="noopener">Source</a><a href="https://huggingface.co/datasets/invi-bhagyesh/ValueArena" target="_blank" rel="noopener">HF Dataset</a><button type="button" class="va-theme-toggle" title="Toggle theme" aria-label="Toggle theme">☾</button></nav></header><main class="va-main"><div class="pt-4"><div role="tablist" aria-label="Home sections" class="home-tabs mb-4"><button id="hometab-chat" role="tab" type="button" aria-selected="true" aria-controls="hometabpanel-chat" tabindex="0" class="home-tab active">New Chat</button><button id="hometab-leaderboard" role="tab" type="button" aria-selected="false" aria-controls="hometabpanel-leaderboard" tabindex="-1" class="home-tab">Leaderboard</button><button id="hometab-experiments" role="tab" type="button" aria-selected="false" aria-controls="hometabpanel-experiments" tabindex="-1" class="home-tab">Experiments</button></div><section id="hometabpanel-chat" role="tabpanel" aria-labelledby="hometab-chat"><div class="chat-setup-screen"><div class="chat-setup-hero"><div class="hero-text"><h2>A Comparative Behavioral Measure of Value Alignment</h2><p>EigenBench is a black-box framework for quantifying value alignment across language models. Compare model responses side-by-side, explore per-constitution leaderboards, and browse experiment runs.</p></div><div class="hero-pipeline"><div class="pipeline-step"><div class="pipeline-icon"><svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M17 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"></path><circle cx="9" cy="7" r="4"></circle><path d="M23 21v-2a4 4 0 0 0-3-3.87"></path><path d="M16 3.13a4 4 0 0 1 0 7.75"></path></svg></div><div class="pipeline-label">Model Ensemble</div><div class="pipeline-desc">Multiple LLMs judge each other's responses</div></div><div class="pipeline-arrow" aria-hidden="true"><svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="5" y1="12" x2="19" y2="12"></line><polyline points="12 5 19 12 12 19"></polyline></svg></div><div class="pipeline-step"><div class="pipeline-icon"><svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="3" y1="3" x2="3" y2="21"></line><line x1="3" y1="21" x2="21" y2="21"></line><rect x="7" y="13" width="3" height="6"></rect><rect x="12" y="8" width="3" height="11"></rect><rect x="17" y="4" width="3" height="15"></rect></svg></div><div class="pipeline-label">BTD Fitting</div><div class="pipeline-desc">Pairwise comparisons fit to Bradley–Terry model</div></div><div class="pipeline-arrow" aria-hidden="true"><svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="5" y1="12" x2="19" y2="12"></line><polyline points="12 5 19 12 12 19"></polyline></svg></div><div class="pipeline-step"><div class="pipeline-icon"><svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 22s8-4 8-10V5l-8-3-8 3v7c0 6 8 10 8 10z"></path><polyline points="9 12 11 14 15 10"></polyline></svg></div><div class="pipeline-label">EigenTrust</div><div class="pipeline-desc">Consensus scores via trust-weighted aggregation</div></div></div></div><div class="battle-setup"><div class="battle-header"><div class="battle-icon" aria-hidden="true"><svg width="22" height="22" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round"><polyline points="14.5 17.5 3 6 3 3 6 3 17.5 14.5"></polyline><line x1="13" y1="19" x2="19" y2="13"></line><line x1="16" y1="16" x2="20" y2="20"></line><line x1="19" y1="21" x2="21" y2="19"></line><polyline points="14.5 6.5 18 3 21 3 21 6 17.5 9.5"></polyline><line x1="5" y1="14" x2="9" y2="18"></line><line x1="7" y1="17" x2="4" y2="20"></line><line x1="3" y1="19" x2="5" y2="21"></line></svg></div><div class="battle-header-text"><h2 class="battle-title">Battle Mode</h2><p class="battle-subtitle">Pit two models head-to-head. Judge which aligns with your values.</p></div></div><div class="battle-config"><div class="battle-row"><div class="battle-section battle-section-grow"><label class="battle-label">Constitution</label><div class="custom-select"><button type="button" class="custom-select-trigger" aria-haspopup="listbox" aria-expanded="false"><span>Kindness</span></button></div></div><div class="battle-section"><label class="battle-label">Matchup</label><div class="battle-mode-toggle"><button type="button" class="battle-mode-btn active">Pick</button><button type="button" class="battle-mode-btn">Random</button></div></div></div><div class="battle-models"><div class="custom-select"><button type="button" class="custom-select-trigger" aria-haspopup="listbox" aria-expanded="false"><img src="/assets/models/claude.png" width="16" height="16" alt="" class="model-logo "/><span>Claude 4 Sonnet</span></button></div><div class="battle-vs">vs</div><div class="custom-select"><button type="button" class="custom-select-trigger" aria-haspopup="listbox" aria-expanded="false"><img src="/assets/models/gpt.png" width="16" height="16" alt="" class="model-logo "/><span>GPT 4.1</span></button></div></div><div class="battle-section battle-key-section"><label class="battle-label">OpenRouter API Key</label><div class="api-key-wrap"><input type="password" placeholder="sk-or-…" value=""/><button type="button" class="api-key-toggle">show</button></div><div class="api-key-hint">Stored locally. Never sent to our servers.</div></div><button type="button" class="battle-start-btn">▶ Start Battle</button></div></div></div></section><section id="hometabpanel-leaderboard" role="tabpanel" aria-labelledby="hometab-leaderboard" hidden=""></section><section id="hometabpanel-experiments" role="tabpanel" aria-labelledby="hometab-experiments" hidden=""></section></div><!--$--><!--/$--></main><script src="/_next/static/chunks/11kjahy2ntf0n.js" id="_R_" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0])</script><script>self.__next_f.push([1,"1:\"$Sreact.fragment\"\n2:I[2971,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"Header\"]\n3:I[39756,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"default\"]\n4:I[37457,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"default\"]\n5:I[48539,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\",\"/_next/static/chunks/15b6_hm_8-7tp.js\",\"/_next/static/chunks/0bm4kqsyrh~il.js\"],\"HomeTabs\"]\n6:I[97367,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"OutletBoundary\"]\n7:\"$Sreact.suspense\"\n9:I[97367,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"ViewportBoundary\"]\nb:I[97367,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"MetadataBoundary\"]\nd:I[68027,[\"/_next/static/chunks/0msaqe10s_at0.js\",\"/_next/static/chunks/02r5woef2zim9.js\"],\"default\",1]\n:HL[\"/_next/static/chunks/14fqz35xbxfy..css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"0:{\"P\":null,\"c\":[\"\",\"\"],\"q\":\"\",\"i\":false,\"f\":[[[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",16],[[\"$\",\"$1\",\"c\",{\"children\":[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/chunks/14fqz35xbxfy..css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\",\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/0msaqe10s_at0.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/02r5woef2zim9.js\",\"async\":true,\"nonce\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"data-theme\":\"dark\",\"suppressHydrationWarning\":true,\"children\":[[\"$\",\"head\",null,{\"children\":[\"$\",\"script\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"(function(){try{var t=localStorage.getItem('va-theme');if(!t){t=window.matchMedia('(prefers-color-scheme: light)').matches?'light':'dark';}document.documentElement.dataset.theme=t;}catch(e){}})();\"}}]}],[\"$\",\"body\",null,{\"children\":[[\"$\",\"$L2\",null,{}],[\"$\",\"main\",null,{\"className\":\"va-main\",\"children\":[\"$\",\"$L3\",null,{\"parallelRouterKey\":\"children\",\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L4\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":404}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],[]],\"forbidden\":\"$undefined\",\"unauthorized\":\"$undefined\"}]}]]}]]}]]}],{\"children\":[[\"$\",\"$1\",\"c\",{\"children\":[[\"$\",\"$L5\",null,{}],[[\"$\",\"script\",\"script-0\",{\"src\":\"/_next/static/chunks/15b6_hm_8-7tp.js\",\"async\":true,\"nonce\":\"$undefined\"}],[\"$\",\"script\",\"script-1\",{\"src\":\"/_next/static/chunks/0bm4kqsyrh~il.js\",\"async\":true,\"nonce\":\"$undefined\"}]],[\"$\",\"$L6\",null,{\"children\":[\"$\",\"$7\",null,{\"name\":\"Next.MetadataOutlet\",\"children\":\"$@8\"}]}]]}],{},null,false,null]},null,false,null],[\"$\",\"$1\",\"h\",{\"children\":[null,[\"$\",\"$L9\",null,{\"children\":\"$La\"}],[\"$\",\"div\",null,{\"hidden\":true,\"children\":[\"$\",\"$Lb\",null,{\"children\":[\"$\",\"$7\",null,{\"name\":\"Next.Metadata\",\"children\":\"$Lc\"}]}]}],null]}],false]],\"m\":\"$undefined\",\"G\":[\"$d\",[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/_next/static/chunks/14fqz35xbxfy..css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\",\"nonce\":\"$undefined\"}]]],\"S\":true,\"h\":null,\"s\":\"$undefined\",\"l\":\"$undefined\",\"p\":\"$undefined\",\"d\":\"$undefined\",\"b\":\"0pntjNG8GFeNi87Lc8Wop\"}\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"charSet\":\"utf-8\"}],[\"$\",\"meta\",\"1\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1, viewport-fit=cover\"}]]\n"])</script><script>self.__next_f.push([1,"8:null\nc:[[\"$\",\"title\",\"0\",{\"children\":\"ValueArena\"}],[\"$\",\"meta\",\"1\",{\"name\":\"description\",\"content\":\"Cross-constitution Elo rankings for language models, judged via EigenBench.\"}]]\n"])</script></body></html>