From 26c7a0c51b86f45fc76c20d6e466a3af4409cfb2 Mon Sep 17 00:00:00 2001 From: Chin-Chang Yang <2770271+ChinChangYang@users.noreply.github.com> Date: Fri, 26 Jun 2026 11:53:47 +0800 Subject: [PATCH] =?UTF-8?q?Add=20Human-SL=20KGS-rank=20ladder=20(gtp=5Fhum?= =?UTF-8?q?an.cfg,=209d=E2=86=9220k)=20+=20tunehuman=20subcommand?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `tunehuman` subcommand and a complete set of GTP configs that make KataGo (with the Human-SL net) play at a chosen amateur rank from 9d down to 20k, where each consecutive rank is exactly 1 KGS rank (1 stone) apart. tunehuman subcommand (cpp/command/tunehuman.cpp, cpp/program/humansltuner.{cpp,h}): - Plays in-process candidate-vs-baseline games and calibrates humanSLChosenMovePiklLambda (the strength dial) to a target winrate at fixed visits, reading the raw winrate (robust to the steep, ceiling-biased λ curve). - -komi / -cand-color flags for the KGS 1-rank handicap (weaker rank as Black, komi 0.5, no color alternation); inherits the baseline config's ruleset so tuning is scored exactly like deployed play. - Resumable per-round checkpointing (-resume-file) to survive long runs. - Unit tests in cpp/tests/testhumansltuner.cpp (run via `katago runtests`). The ladder (cpp/configs/gtp_human{9d..1d,1k..20k}.cfg): - 29 configs. The 9d anchor is even-game parity vs the modern rank_9d profile; every weaker rank is tuned so it (Black, komi 0.5) is an even game (50%) vs the rung above it, with the 95% Wilson CI inside [40%, 60%]. Japanese rules. - λ rises 0.045 (9d) → 1.223 (20k); the deep-kyu rungs need near-pure-human play because the Human-SL net's rank profiles compress at the weakest end. docs/HumanSL_Rank_Ladder.md documents the method, ruleset rationale, reproduction commands, the full results table (λ / win rate / 95% CI / games per rung), and findings. ladder_step.sh + tune_decide.py + tune_{lambda,maxvisits}.sh are the automated sequential root-finder harness that produced the ladder. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01L2nqY5X9rSVpH65nWHCPaF --- cpp/CMakeLists.txt | 3 + cpp/command/runtests.cpp | 2 + cpp/command/tunehuman.cpp | 575 +++++++++++++++++++++++++++++++++ cpp/configs/gtp_human10k.cfg | 74 +++++ cpp/configs/gtp_human11k.cfg | 74 +++++ cpp/configs/gtp_human12k.cfg | 74 +++++ cpp/configs/gtp_human13k.cfg | 74 +++++ cpp/configs/gtp_human14k.cfg | 74 +++++ cpp/configs/gtp_human15k.cfg | 74 +++++ cpp/configs/gtp_human16k.cfg | 74 +++++ cpp/configs/gtp_human17k.cfg | 74 +++++ cpp/configs/gtp_human18k.cfg | 74 +++++ cpp/configs/gtp_human19k.cfg | 74 +++++ cpp/configs/gtp_human1d.cfg | 74 +++++ cpp/configs/gtp_human1k.cfg | 74 +++++ cpp/configs/gtp_human20k.cfg | 74 +++++ cpp/configs/gtp_human2d.cfg | 74 +++++ cpp/configs/gtp_human2k.cfg | 74 +++++ cpp/configs/gtp_human3d.cfg | 74 +++++ cpp/configs/gtp_human3k.cfg | 74 +++++ cpp/configs/gtp_human4d.cfg | 74 +++++ cpp/configs/gtp_human4k.cfg | 74 +++++ cpp/configs/gtp_human5d.cfg | 74 +++++ cpp/configs/gtp_human5k.cfg | 74 +++++ cpp/configs/gtp_human6d.cfg | 74 +++++ cpp/configs/gtp_human6k.cfg | 74 +++++ cpp/configs/gtp_human7d.cfg | 74 +++++ cpp/configs/gtp_human7k.cfg | 74 +++++ cpp/configs/gtp_human8d.cfg | 74 +++++ cpp/configs/gtp_human8k.cfg | 74 +++++ cpp/configs/gtp_human9d.cfg | 86 +++++ cpp/configs/gtp_human9k.cfg | 74 +++++ cpp/main.cpp | 3 + cpp/main.h | 1 + cpp/program/humansltuner.cpp | 300 +++++++++++++++++ cpp/program/humansltuner.h | 128 ++++++++ cpp/program/play.cpp | 12 +- cpp/program/play.h | 1 + cpp/tests/testhumansltuner.cpp | 483 +++++++++++++++++++++++++++ cpp/tests/tests.h | 3 + docs/HumanSL_Rank_Ladder.md | 276 ++++++++++++++++ ladder_step.sh | 91 ++++++ tune_decide.py | 160 +++++++++ tune_lambda.sh | 80 +++++ tune_maxvisits.sh | 87 +++++ 45 files changed, 4357 insertions(+), 6 deletions(-) create mode 100644 cpp/command/tunehuman.cpp create mode 100644 cpp/configs/gtp_human10k.cfg create mode 100644 cpp/configs/gtp_human11k.cfg create mode 100644 cpp/configs/gtp_human12k.cfg create mode 100644 cpp/configs/gtp_human13k.cfg create mode 100644 cpp/configs/gtp_human14k.cfg create mode 100644 cpp/configs/gtp_human15k.cfg create mode 100644 cpp/configs/gtp_human16k.cfg create mode 100644 cpp/configs/gtp_human17k.cfg create mode 100644 cpp/configs/gtp_human18k.cfg create mode 100644 cpp/configs/gtp_human19k.cfg create mode 100644 cpp/configs/gtp_human1d.cfg create mode 100644 cpp/configs/gtp_human1k.cfg create mode 100644 cpp/configs/gtp_human20k.cfg create mode 100644 cpp/configs/gtp_human2d.cfg create mode 100644 cpp/configs/gtp_human2k.cfg create mode 100644 cpp/configs/gtp_human3d.cfg create mode 100644 cpp/configs/gtp_human3k.cfg create mode 100644 cpp/configs/gtp_human4d.cfg create mode 100644 cpp/configs/gtp_human4k.cfg create mode 100644 cpp/configs/gtp_human5d.cfg create mode 100644 cpp/configs/gtp_human5k.cfg create mode 100644 cpp/configs/gtp_human6d.cfg create mode 100644 cpp/configs/gtp_human6k.cfg create mode 100644 cpp/configs/gtp_human7d.cfg create mode 100644 cpp/configs/gtp_human7k.cfg create mode 100644 cpp/configs/gtp_human8d.cfg create mode 100644 cpp/configs/gtp_human8k.cfg create mode 100644 cpp/configs/gtp_human9d.cfg create mode 100644 cpp/configs/gtp_human9k.cfg create mode 100644 cpp/program/humansltuner.cpp create mode 100644 cpp/program/humansltuner.h create mode 100644 cpp/tests/testhumansltuner.cpp create mode 100644 docs/HumanSL_Rank_Ladder.md create mode 100755 ladder_step.sh create mode 100644 tune_decide.py create mode 100755 tune_lambda.sh create mode 100755 tune_maxvisits.sh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5c28fa94ea..4d63aac23e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -321,6 +321,7 @@ add_executable(katago program/playutils.cpp program/playsettings.cpp program/play.cpp + program/humansltuner.cpp program/selfplaymanager.cpp ${GIT_HEADER_FILE_ALWAYS_UPDATED} tests/testboardarea.cpp @@ -333,6 +334,7 @@ add_executable(katago tests/testrules.cpp tests/testscore.cpp tests/testsgf.cpp + tests/testhumansltuner.cpp tests/testsymmetries.cpp tests/testnninputs.cpp tests/testownership.cpp @@ -365,6 +367,7 @@ add_executable(katago command/selfplay.cpp command/startposes.cpp command/tune.cpp + command/tunehuman.cpp command/writetrainingdata.cpp main.cpp ) diff --git a/cpp/command/runtests.cpp b/cpp/command/runtests.cpp index 663add137c..44a9032f48 100644 --- a/cpp/command/runtests.cpp +++ b/cpp/command/runtests.cpp @@ -55,6 +55,8 @@ int MainCmds::runtests(const vector& args) { Tests::runSymmetryDifferenceTests(); Tests::runBoardReplayTest(); + Tests::runHumanSLTunerTests(); + ScoreValue::freeTables(); Tests::runInlineConfigTests(); diff --git a/cpp/command/tunehuman.cpp b/cpp/command/tunehuman.cpp new file mode 100644 index 0000000000..0aa46ae535 --- /dev/null +++ b/cpp/command/tunehuman.cpp @@ -0,0 +1,575 @@ +#include "../core/global.h" +#include "../core/config_parser.h" +#include "../core/fileutils.h" +#include "../core/logger.h" +#include "../core/rand.h" +#include "../game/board.h" +#include "../game/boardhistory.h" +#include "../game/rules.h" +#include "../neuralnet/nninputs.h" +#include "../neuralnet/nneval.h" +#include "../neuralnet/sgfmetadata.h" +#include "../dataio/trainingwrite.h" +#include "../search/search.h" +#include "../search/searchparams.h" +#include "../program/setup.h" +#include "../program/play.h" +#include "../program/playsettings.h" +#include "../program/humansltuner.h" +#include "../command/commandline.h" +#include "../main.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +int MainCmds::tunehuman(const vector& args) { + Board::initHash(); + ScoreValue::initTables(); + + string baselineConfigPath; + string profile; + double targetElo = 0.0; + string outputConfigPath; + string modelFile; + string humanModelFile; + double eloTol = 25.0; + int gamesPerRound = 32; + int maxRounds = 24; + int numGameThreadsArgVal = -1; + string seedStr = "tunehuman"; + string resumeFile; + int searchVisits = -1; + int maxVisitsCap = -1; + double piklFloor = 0.02; + double piklMax = 1.0e4; + double dtauMax = 0.6; + double candHumanRootExplore = -1.0; + double xLo = 0.0; + double xHi = 3.0; + double komi = 7.5; + string candColor = "auto"; + + try { + KataGoCommandLine cmd("Tune human-SL play parameters to hit a target ELO offset vs a baseline config."); + cmd.addModelFileArg(); + cmd.addHumanModelFileArg(); + TCLAP::ValueArg baselineConfigArg("","baseline-config","Baseline human-SL config (defines ELO 0).",true,"","FILE"); + TCLAP::ValueArg profileArg("","profile","Candidate humanSLProfile, e.g. preaz_8d.",true,"","PROFILE"); + TCLAP::ValueArg targetEloArg("","target-elo","Desired (candidate - baseline) ELO. Negative = weaker.",true,0.0,"ELO"); + TCLAP::ValueArg outputConfigArg("","output-config","Where to write the tuned config.",true,"","FILE"); + TCLAP::ValueArg eloTolArg("","elo-tol","Stop when 1-sigma CI half-width (ELO) <= this.",false,25.0,"ELO"); + TCLAP::ValueArg gamesPerRoundArg("","games-per-round","Games per dial value per round.",false,32,"N"); + TCLAP::ValueArg maxRoundsArg("","max-rounds","Hard cap on rounds.",false,24,"N"); + TCLAP::ValueArg numGameThreadsArg("","num-game-threads","Parallel games within a round.",false,-1,"N"); + TCLAP::ValueArg seedArg("","seed","Master seed for reproducibility.",false,"tunehuman","SEED"); + TCLAP::ValueArg resumeFileArg("","resume-file","Per-round checkpoint file for resumable calibration. Empty = auto (.samples).",false,"","FILE"); + TCLAP::ValueArg searchVisitsArg("","search-visits","Visits in the piklLambda segment (>=2). -1 = auto (anchor to baseline maxVisits).",false,-1,"N"); + TCLAP::ValueArg maxVisitsCapArg("","max-visits-cap","Visits at the strong end. -1 = auto (anchor to baseline maxVisits).",false,-1,"N"); + TCLAP::ValueArg piklFloorArg("","pikl-floor","Smallest piklLambda (strongest).",false,0.02,"F"); + TCLAP::ValueArg piklMaxArg("","pikl-max","Largest active piklLambda.",false,1.0e4,"F"); + TCLAP::ValueArg dtauMaxArg("","dtau-max","Max temperature offset at the weak end.",false,0.6,"F"); + TCLAP::ValueArg candHumanRootExploreArg("","cand-human-root-explore","Override the CANDIDATE's humanSLRootExploreProbWeightless (lower = less human-policy exploration = stronger). -1 = use baseline config's value.",false,-1.0,"F"); + TCLAP::ValueArg xLoArg("","x-lo","Low end of the strength coordinate search range.",false,0.0,"F"); + TCLAP::ValueArg xHiArg("","x-hi","High end of the strength coordinate search range.",false,3.0,"F"); + TCLAP::ValueArg komiArg("","komi","Komi for the games. Use 0.5 for a KGS 1-rank handicap (stronger=White gets no compensation).",false,7.5,"F"); + TCLAP::ValueArg candColorArg("","cand-color","Candidate's color: auto (alternate, removes color bias), black, or white. Use 'black' with -komi 0.5 for a 1-rank handicap match (weaker candidate as Black).",false,"auto","COLOR"); + cmd.add(baselineConfigArg); + cmd.add(profileArg); + cmd.add(targetEloArg); + cmd.add(outputConfigArg); + cmd.add(eloTolArg); + cmd.add(gamesPerRoundArg); + cmd.add(maxRoundsArg); + cmd.add(numGameThreadsArg); + cmd.add(seedArg); + cmd.add(resumeFileArg); + cmd.add(searchVisitsArg); + cmd.add(maxVisitsCapArg); + cmd.add(piklFloorArg); + cmd.add(piklMaxArg); + cmd.add(dtauMaxArg); + cmd.add(candHumanRootExploreArg); + cmd.add(xLoArg); + cmd.add(xHiArg); + cmd.add(komiArg); + cmd.add(candColorArg); + cmd.parseArgs(args); + + modelFile = cmd.getModelFile(); + humanModelFile = cmd.getHumanModelFile(); + baselineConfigPath = baselineConfigArg.getValue(); + profile = profileArg.getValue(); + targetElo = targetEloArg.getValue(); + outputConfigPath = outputConfigArg.getValue(); + eloTol = eloTolArg.getValue(); + gamesPerRound = gamesPerRoundArg.getValue(); + maxRounds = maxRoundsArg.getValue(); + numGameThreadsArgVal = numGameThreadsArg.getValue(); + seedStr = seedArg.getValue(); + resumeFile = resumeFileArg.getValue(); + searchVisits = searchVisitsArg.getValue(); + maxVisitsCap = maxVisitsCapArg.getValue(); + piklFloor = piklFloorArg.getValue(); + piklMax = piklMaxArg.getValue(); + dtauMax = dtauMaxArg.getValue(); + candHumanRootExplore = candHumanRootExploreArg.getValue(); + xLo = xLoArg.getValue(); + xHi = xHiArg.getValue(); + komi = komiArg.getValue(); + candColor = candColorArg.getValue(); + } + catch(TCLAP::ArgException& e) { + cerr << "Error: " << e.error() << " for argument " << e.argId() << endl; + return 1; + } + catch(const StringError& e) { + cerr << "Error: " << e.what() << endl; + return 1; + } + + // ---- validation ---- + if(humanModelFile.empty()) { cerr << "Error: -human-model is required." << endl; return 1; } + if(!FileUtils::exists(baselineConfigPath)) { cerr << "Error: baseline-config not found: " << baselineConfigPath << endl; return 1; } + if(!FileUtils::exists(modelFile)) { cerr << "Error: model not found: " << modelFile << endl; return 1; } + if(!FileUtils::exists(humanModelFile)) { cerr << "Error: human-model not found: " << humanModelFile << endl; return 1; } + if(gamesPerRound < 1) { cerr << "Error: -games-per-round must be >= 1." << endl; return 1; } + if(xLo >= xHi) { cerr << "Error: -x-lo must be < -x-hi." << endl; return 1; } + if(candColor != "auto" && candColor != "black" && candColor != "white") { + cerr << "Error: -cand-color must be auto, black, or white." << endl; return 1; + } + if(eloTol <= 0.0) { cerr << "Error: -elo-tol must be > 0." << endl; return 1; } + if(searchVisits != -1 && searchVisits < 2) { cerr << "Error: -search-visits must be >= 2 (piklLambda needs >1 visit), or -1 for auto." << endl; return 1; } + if(maxVisitsCap != -1 && maxVisitsCap < 1) { cerr << "Error: -max-visits-cap must be >= 1, or -1 for auto." << endl; return 1; } + if(maxRounds < 1) { cerr << "Error: -max-rounds must be >= 1." << endl; return 1; } + if(maxRounds < 4) + cout << "WARNING: -max-rounds " << maxRounds << " < 4: calibration needs at least 4 rounds to" + << " reach 'converged' (it requires 4 distinct dial samples). It will still run and write" + << " a best-achievable config, but converged will be false." << endl; + + int numGameThreads = numGameThreadsArgVal > 0 + ? numGameThreadsArgVal + : std::max(1, std::min(gamesPerRound, (int)std::thread::hardware_concurrency())); + + cout << "tunehuman parsed configuration:" << endl; + cout << " baseline-config = " << baselineConfigPath << endl; + cout << " profile = " << profile << endl; + cout << " target-elo = " << targetElo << endl; + cout << " output-config = " << outputConfigPath << endl; + cout << " model = " << modelFile << endl; + cout << " human-model = " << humanModelFile << endl; + cout << " elo-tol = " << eloTol << endl; + cout << " games-per-round = " << gamesPerRound << endl; + cout << " max-rounds = " << maxRounds << endl; + cout << " num-game-threads= " << numGameThreads << endl; + cout << " seed = " << seedStr << endl; + cout << " resume-file = " << (resumeFile.empty() ? string("auto (.samples)") : resumeFile) << endl; + cout << " search-visits = " << (searchVisits < 0 ? string("auto") : Global::intToString(searchVisits)) << endl; + cout << " max-visits-cap = " << (maxVisitsCap < 0 ? string("auto") : Global::intToString(maxVisitsCap)) << endl; + cout << " pikl-floor = " << piklFloor << endl; + cout << " pikl-max = " << piklMax << endl; + cout << " dtau-max = " << dtauMax << endl; + cout << " cand-human-root-explore = " << (candHumanRootExplore < 0.0 ? string("(baseline)") : Global::doubleToString(candHumanRootExplore)) << endl; + cout << " x-lo / x-hi = " << xLo << " / " << xHi << endl; + cout << " komi = " << komi << endl; + cout << " cand-color = " << candColor << (candColor == "auto" ? " (alternate)" : (komi != 7.5 ? " (handicap match)" : "")) << endl; + + // ---- load baseline config, logger, params, nets ---- + ConfigParser baselineCfg(baselineConfigPath); + Logger logger(&baselineCfg, true, false, true, false); // log to stdout, with time, don't dump config + + const bool hasHumanModel = true; + SearchParams baselineParams = Setup::loadSingleParams(baselineCfg, Setup::SETUP_FOR_GTP, hasHumanModel); + string baselineText = baselineCfg.getContents(); + + // ---- resolve the candidate visit budget, anchored to the baseline's own maxVisits ---- + // Honors "don't spend more compute than the baseline unless explicitly asked": with both + // -search-visits and -max-visits-cap on auto (-1), the budget collapses onto baselineParams.maxVisits, + // so the dial's visits never exceed the baseline. Visits rise above baseline only on explicit opt-in. + VisitBudget vb = resolveVisitBudget(baselineParams.maxVisits, searchVisits, maxVisitsCap); + string baselineDesc = vb.baselineHasCap ? Global::int64ToString(baselineParams.maxVisits) : string("uncapped"); + logger.write( + "Resolved visit budget: mid(segment-B)=" + Global::intToString(vb.midVisits) + + " cap(segment-C)=" + Global::intToString(vb.maxVisitsCap) + + " (baseline maxVisits=" + baselineDesc + ")"); + if(!vb.baselineHasCap) + logger.write("INFO: baseline config has no maxVisits cap (search bounded by time/playouts); " + "anchoring tuner visit budget to mid=" + Global::intToString(vb.midVisits) + + " cap=" + Global::intToString(vb.maxVisitsCap) + "." + + ((searchVisits == -1 || maxVisitsCap == -1) + ? string(" Pass -search-visits/-max-visits-cap to override.") : string(""))); + if(maxVisitsCap != -1 && maxVisitsCap < vb.midVisits) + logger.write("WARNING: -max-visits-cap " + Global::intToString(maxVisitsCap) + + " < resolved search-visits " + Global::intToString(vb.midVisits) + + "; raised cap to " + Global::intToString(vb.midVisits) + + " to keep strength monotone in segment C."); + if(vb.flooredFromBelow2) + logger.write("NOTE: resolved segment-B visits were below 2 (piklLambda needs >=2 visits to act); " + "running segment B at the 2-visit minimum (baseline maxVisits=" + baselineDesc + + ", -search-visits=" + (searchVisits < 0 ? string("auto") : Global::intToString(searchVisits)) + ")."); + // Loud over-baseline warning: fire whenever the OPERATOR explicitly set a lever above the anchor the + // budget is judged against (effectiveBaseline). This is independent of the mandatory sub-2 mid floor + // (so an explicit big -max-visits-cap still warns even when -search-visits was floored), and covers + // both finite-cap baselines and uncapped baselines (where the anchor is the legacy 100). + bool userMidRaise = (searchVisits != -1) && (vb.midVisits > vb.effectiveBaseline); + bool userCapRaise = (maxVisitsCap != -1) && (vb.maxVisitsCap > vb.effectiveBaseline); + if(userMidRaise || userCapRaise) + logger.write("WARNING: you explicitly set a visit budget (mid=" + Global::intToString(vb.midVisits) + + ", cap=" + Global::intToString(vb.maxVisitsCap) + ") above the " + + (vb.baselineHasCap + ? ("baseline maxVisits=" + baselineDesc) + : ("legacy anchor of " + Global::intToString(vb.effectiveBaseline) + " (baseline is uncapped)")) + + ". A weaker target may then cost MORE compute than the baseline, and a large visit count " + "significantly increases time per move. Omit -search-visits/-max-visits-cap to anchor to the baseline."); + + SearchParams candidateBaseParams = baselineParams; + try { + candidateBaseParams.humanSLProfile = SGFMetadata::getProfile(profile); + } + catch(const StringError& e) { + cerr << "Error: invalid -profile '" << profile << "': " << e.what() << endl; + return 1; + } + + Rand seedRand(seedStr); + int maxBotThreads = std::max(1, baselineParams.numThreads); + int expectedConcurrentEvals = maxBotThreads * numGameThreads; + const int defaultMaxBatchSize = std::max(8, ((expectedConcurrentEvals + 3) / 4) * 4); + const bool defaultRequireExactNNLen = true; // fixed 19x19 + const bool disableFP16 = false; + const string expectedSha256 = ""; + const int boardLen = 19; + + NNEvaluator* mainNNEval = Setup::initializeNNEvaluator( + modelFile, modelFile, expectedSha256, baselineCfg, logger, seedRand, expectedConcurrentEvals, + boardLen, boardLen, defaultMaxBatchSize, defaultRequireExactNNLen, disableFP16, Setup::SETUP_FOR_GTP); + logger.write("Loaded main net"); + + NNEvaluator* humanNNEval = Setup::initializeNNEvaluator( + humanModelFile, humanModelFile, expectedSha256, baselineCfg, logger, seedRand, expectedConcurrentEvals, + boardLen, boardLen, defaultMaxBatchSize, defaultRequireExactNNLen, disableFP16, Setup::SETUP_FOR_GTP); + logger.write("Loaded human SL net"); + if(!humanNNEval->requiresSGFMetadata()) + logger.write("WARNING: -human-model was not trained from SGF metadata; profile may have no effect."); + + // ---- minimal game-setup config (rules/board/komi only; bot strength comes from BotSpec) ---- + // Inherit the board ruleset from the baseline config (a deployed gtp_human.cfg, e.g. + // "rules = japanese") so tuning games are scored EXACTLY like real play. Calibrating under a + // different ruleset than the configs are deployed with would be an avoidable confound (area vs + // territory scoring changes endgame play, and the human-SL net's KGS-rank conditioning is most + // faithful under the ruleset its KGS training games used). Falls back to Japanese if unspecified. + Rules gameRules = Rules::parseRules(baselineCfg.contains("rules") ? baselineCfg.getString("rules") : "japanese"); + logger.write("Tuning-game ruleset (inherited from baseline config): " + gameRules.toStringNoKomi()); + std::map gameCfgMap = { + {"koRules", Rules::writeKoRule(gameRules.koRule)}, + {"scoringRules", Rules::writeScoringRule(gameRules.scoringRule)}, + {"taxRules", Rules::writeTaxRule(gameRules.taxRule)}, + {"multiStoneSuicideLegals", gameRules.multiStoneSuicideLegal ? "true" : "false"}, + {"hasButtons", gameRules.hasButton ? "true" : "false"}, + {"bSizes", "19"}, + {"bSizeRelProbs", "1"}, + {"komiMean", Global::doubleToString(komi)}, + {"komiStdev", "0.0"}, + {"komiAllowIntegerProb", "0.0"}, + {"logSearchInfo", "false"}, + {"logMoves", "false"}, + {"maxMovesPerGame", "1200"}, + }; + ConfigParser gameCfg(gameCfgMap); + PlaySettings playSettings; // default: forSelfPlay=false, allowResignation=false, no fork/cheap/reduce + GameRunner* gameRunner = new GameRunner(gameCfg, playSettings, logger); + + // ---- dial config + target ---- + StrengthDialConfig dialConfig; + dialConfig.piklFloor = piklFloor; + dialConfig.piklMax = piklMax; + dialConfig.searchVisits = vb.midVisits; + dialConfig.maxVisitsCap = vb.maxVisitsCap; + dialConfig.dtauMax = dtauMax; + + // When segment C is flat (cap == mid, the auto outcome), the strong third of the dial [2,3] collapses + // to a single indistinguishable point. effectiveXHi restricts calibration to [xLo, 2.0] so we neither + // waste rounds on that plateau nor let an unreachable-strong target settle mid-plateau and dodge the + // boundary warning. + double effXHi = effectiveXHi(vb, xLo, xHi); + if(effXHi < xHi) + logger.write("INFO: strong-end visit budget equals mid (segment C is flat at " + + Global::intToString(vb.midVisits) + " visits); restricting calibration to x in [" + + Global::doubleToString(xLo) + ", 2.0]. Raise -max-visits-cap above the baseline to " + "calibrate stronger play."); + if(vb.maxVisitsCap == vb.midVisits && xLo >= 2.0) + logger.write("WARNING: segment C is flat (cap == mid) and -x-lo " + Global::doubleToString(xLo) + + " >= 2.0, so the entire calibration range lies on the flat strong plateau; every dial maps " + "to identical play and calibration cannot discriminate strength. Raise -max-visits-cap above " + "the baseline, or lower -x-lo below 2.0."); + + const double TEMP_CAP = 1.0; + auto clipTemp = [TEMP_CAP](double v) { return v < 0.0 ? 0.0 : (v > TEMP_CAP ? TEMP_CAP : v); }; + double targetWinrate = 1.0 / (1.0 + std::pow(10.0, -targetElo / 400.0)); + + // ---- playAt(x): set candidate dials, play gamesPerRound games candidate-vs-baseline ---- + int roundCounter = 0; + auto playAt = [&](double x) -> std::pair { + int round = roundCounter++; + StrengthDialParams dials = strengthDialToParams(x, dialConfig); + + SearchParams cand = candidateBaseParams; + cand.humanSLChosenMovePiklLambda = dials.piklLambda; + cand.maxVisits = dials.maxVisits; + cand.chosenMoveTemperature = clipTemp(baselineParams.chosenMoveTemperature + dials.deltaTau); + cand.chosenMoveTemperatureEarly = clipTemp(baselineParams.chosenMoveTemperatureEarly + dials.deltaTau); + // Optional: strengthen the candidate by reducing its human-policy SEARCH exploration (the piklLambda + // lever only affects move SELECTION; the ~100-ELO preaz_9d-vs-rank_9d gap lives in which moves get + // explored). Lower = less human exploration = closer to pure main-net search = stronger. + if(candHumanRootExplore >= 0.0) + cand.humanSLRootExploreProbWeightless = candHumanRootExplore; + + std::atomic nextGameIdx(0); + double candidateWins = 0.0; + int countedGames = 0; + std::mutex tallyMutex; + + auto worker = [&]() { + while(true) { + int gameIdx = nextGameIdx.fetch_add(1); + if(gameIdx >= gamesPerRound) + break; + bool candIsBlack = (candColor == "black") ? true + : (candColor == "white") ? false + : (gameIdx % 2 == 0); // auto: alternate to remove color bias + string seed = seedStr + ":r" + Global::intToString(round) + ":g" + Global::intToString(gameIdx); + + MatchPairer::BotSpec specCand; + specCand.botIdx = 0; specCand.botName = "cand"; + specCand.nnEval = mainNNEval; specCand.humanEval = humanNNEval; + specCand.baseParams = cand; + MatchPairer::BotSpec specBase; + specBase.botIdx = 1; specBase.botName = "base"; + specBase.nnEval = mainNNEval; specBase.humanEval = humanNNEval; + specBase.baseParams = baselineParams; + + const MatchPairer::BotSpec& specB = candIsBlack ? specCand : specBase; + const MatchPairer::BotSpec& specW = candIsBlack ? specBase : specCand; + + std::function shouldStop = []() { return false; }; + std::function noopAfterInit = + [](const MatchPairer::BotSpec&, Search*) {}; + + FinishedGameData* g = gameRunner->runGame( + seed, specB, specW, NULL, NULL, logger, + shouldStop, nullptr, nullptr, noopAfterInit, nullptr); + if(g == NULL) + continue; + + bool counted = true; + double winInc = 0.0; + if(g->endHist.isNoResult) { + counted = false; + } else { + Player winner = g->endHist.winner; + Player candPlayerColor = candIsBlack ? P_BLACK : P_WHITE; + if(winner == C_EMPTY) winInc = 0.5; // draw + else winInc = (winner == candPlayerColor) ? 1.0 : 0.0; + } + delete g; + + if(counted) { + std::lock_guard lock(tallyMutex); + candidateWins += winInc; + countedGames += 1; + } + } + }; + + std::vector threads; + threads.reserve(numGameThreads); + for(int t = 0; t < numGameThreads; t++) + threads.emplace_back(worker); + for(size_t t = 0; t < threads.size(); t++) + threads[t].join(); + + return std::make_pair(candidateWins, countedGames); + }; + + // ---- progress logging per round ---- + auto onRound = [&](int round, double xStar, double eloSe, int distinctXs, int totalGames) { + StrengthDialParams d = strengthDialToParams(xStar, dialConfig); + logger.write( + "Round " + Global::intToString(round) + + ": x*=" + Global::doubleToString(xStar) + + " eloSe=" + Global::doubleToString(eloSe) + + " distinctX=" + Global::intToString(distinctXs) + + " games=" + Global::intToString(totalGames) + + " dial[piklLambda=" + Global::doubleToString(d.piklLambda) + + " maxVisits=" + Global::intToString(d.maxVisits) + + " deltaTau=" + Global::doubleToString(d.deltaTau) + "]"); + }; + + // ---- resume support: a per-round checkpoint so an interrupted run continues instead of restarting ---- + // The checkpoint stores each round's (x, wins, games). A signature header guards against pooling samples + // from a different matchup/dial: only the fields that define the candidate-vs-baseline winrate at a given + // dial x are included (NOT target/tol/range/seed, which affect only sampling and stopping). + string resumeFilePath = resumeFile.empty() ? (outputConfigPath + ".samples") : resumeFile; + string resumeHeader = + string("# tunehuman-samples v1") + + " profile=" + profile + + " model=" + modelFile + + " human=" + humanModelFile + + " baseline=" + baselineConfigPath + + " mid=" + Global::intToString(vb.midVisits) + + " cap=" + Global::intToString(vb.maxVisitsCap) + + " piklFloor=" + Global::doubleToString(piklFloor) + + " piklMax=" + Global::doubleToString(piklMax) + + " dtau=" + Global::doubleToString(dtauMax); + + std::vector initialSamples; + if(FileUtils::exists(resumeFilePath)) { + ifstream in(resumeFilePath); + if(!in.good()) { cerr << "Error: cannot open resume-file for reading: " << resumeFilePath << endl; return 1; } + string line; + bool headerSeen = false; + int lineNum = 0; + while(std::getline(in, line)) { + lineNum++; + string t = Global::trim(line); + if(t.empty()) + continue; + if(t[0] == '#') { + if(!headerSeen) { + if(t != resumeHeader) { + cerr << "Error: resume-file " << resumeFilePath << " was written for a different configuration.\n" + << " found: " << t << "\n" + << " expected: " << resumeHeader << "\n" + << "Remove it to start fresh, or pass a different -resume-file." << endl; + return 1; + } + headerSeen = true; + } + continue; + } + // Tolerate a malformed line (warn + skip) rather than fail: a hard kill mid-append can leave a + // truncated final line, and a fatal parse would then permanently block resume. Bad numeric tokens + // are skipped the same way (tryStringToDouble never throws). + std::vector parts = Global::split(t, ' '); + CalibrationSample s; + if(parts.size() != 3 || + !Global::tryStringToDouble(parts[0], s.x) || + !Global::tryStringToDouble(parts[1], s.wins) || + !Global::tryStringToDouble(parts[2], s.games)) { + logger.write("WARNING: skipping malformed resume-file line " + Global::intToString(lineNum) + " in " + + resumeFilePath + " (likely a partial write from an interrupted run): '" + line + "'"); + continue; + } + // Semantic gate: a kill mid-write can truncate the LAST token (games) into a still-parseable but + // wrong value (e.g. wins > games), which would silently poison the fit. Every clean round has + // games >= 1 and 0 <= wins <= games, so this never rejects a valid sample -- only a corrupt one. + if(!std::isfinite(s.x) || !std::isfinite(s.wins) || !std::isfinite(s.games) || + s.games < 1.0 || s.wins < -1e-9 || s.wins > s.games + 1e-9) { + logger.write("WARNING: skipping out-of-range resume-file line " + Global::intToString(lineNum) + " in " + + resumeFilePath + " (likely a partial write from an interrupted run): '" + line + "'"); + continue; + } + initialSamples.push_back(s); + } + in.close(); + if(!headerSeen) { + if(!initialSamples.empty()) { + cerr << "Error: resume-file " << resumeFilePath << " has samples but no recognizable signature header." << endl; + return 1; + } + // File exists but is empty/headerless (0-byte file, or a header write killed mid-flush by the + // runtime cap). Recreate the header so the per-round appends below land in a well-formed file -- + // otherwise the NEXT restart would see samples-without-header and fatally refuse to resume. + ofstream hdrOut(resumeFilePath, std::ios::trunc); + if(!hdrOut.good()) { cerr << "Error: cannot (re)create resume-file: " << resumeFilePath << endl; return 1; } + hdrOut << resumeHeader << "\n"; + hdrOut.close(); + logger.write("Resume-file " + resumeFilePath + " had no header (empty/partial write); recreated it."); + } + logger.write("Resuming calibration from " + Global::intToString((int)initialSamples.size()) + + " checkpointed round(s) in " + resumeFilePath + "."); + } + else { + ofstream hdrOut(resumeFilePath); + if(!hdrOut.good()) { cerr << "Error: cannot create resume-file: " << resumeFilePath << endl; return 1; } + hdrOut << resumeHeader << "\n"; + hdrOut.close(); + logger.write("Checkpointing each round to " + resumeFilePath + " (resumable across restarts)."); + } + + // Durable per-round append: open/flush/close each call so a hard kill mid-run can't lose a completed round. + auto onSampleCollected = [&](double x, double wins, double games) { + ofstream app(resumeFilePath, std::ios::app); + app << Global::doubleToStringHighPrecision(x) << " " + << Global::doubleToStringHighPrecision(wins) << " " + << Global::doubleToStringHighPrecision(games) << "\n"; + app.close(); + }; + + // ---- run calibration ---- + uint64_t rngSeed = (uint64_t)std::hash()(seedStr); + CalibrationResult result = calibrateToTarget( + playAt, xLo, effXHi, targetWinrate, gamesPerRound, maxRounds, eloTol, rngSeed, 0.5, onRound, + initialSamples, onSampleCollected); + + // ---- compute final dials + fitted ELO ---- + StrengthDialParams finalDials = strengthDialToParams(result.xStar, dialConfig); + double tempBase = clipTemp(baselineParams.chosenMoveTemperature + finalDials.deltaTau); + double tempEarly = clipTemp(baselineParams.chosenMoveTemperatureEarly + finalDials.deltaTau); + double fittedWinrate = result.model.predict(result.xStar); + double fittedElo = 400.0 * std::log10(fittedWinrate / (1.0 - fittedWinrate)); + bool reachedBoundary = (result.xStar <= xLo + 1e-6) || (result.xStar >= effXHi - 1e-6); + + // ---- build header + overridden config text ---- + std::ostringstream hdr; + hdr << "# Tuned by `katago tunehuman`.\n"; + hdr << "# baseline-config : " << baselineConfigPath << "\n"; + hdr << "# profile : " << profile << "\n"; + hdr << "# models : " << modelFile << " / " << humanModelFile << "\n"; + hdr << "# target-elo : " << targetElo << " (targetWinrate " << targetWinrate << " vs baseline)\n"; + hdr << "# achieved : fitted " << fittedElo << " ELO +/- " << result.eloSe + << " (1-sigma), over " << result.totalGames << " games, " << result.rounds + << " rounds, converged=" << (result.converged ? "yes" : "no") << "\n"; + hdr << "# dial : x*=" << result.xStar << " piklLambda=" << finalDials.piklLambda + << " maxVisits=" << finalDials.maxVisits << " deltaTau=" << finalDials.deltaTau << "\n"; + hdr << "# seed : " << seedStr << "\n"; + if(reachedBoundary) { + hdr << "# WARNING: target ELO not reachable within the dial range; best-achievable shown.\n"; + hdr << "# Widen -max-visits-cap / -dtau-max / -x-lo / -x-hi to extend the range.\n"; + logger.write("WARNING: target ELO not reachable within dial range; wrote best-achievable config (x* at boundary)."); + } + hdr << "\n"; + + std::vector> overrides; + overrides.push_back(std::make_pair("humanSLProfile", profile)); + overrides.push_back(std::make_pair("humanSLChosenMovePiklLambda", Global::doubleToString(finalDials.piklLambda))); + overrides.push_back(std::make_pair("maxVisits", Global::intToString(finalDials.maxVisits))); + overrides.push_back(std::make_pair("chosenMoveTemperature", Global::doubleToString(tempBase))); + overrides.push_back(std::make_pair("chosenMoveTemperatureEarly", Global::doubleToString(tempEarly))); + if(candHumanRootExplore >= 0.0) + overrides.push_back(std::make_pair("humanSLRootExploreProbWeightless", Global::doubleToString(candHumanRootExplore))); + + string finalText = hdr.str() + overrideConfigText(baselineText, overrides); + + ofstream out; + FileUtils::open(out, outputConfigPath); + out << finalText; + out.close(); + + logger.write( + "Wrote tuned config to " + outputConfigPath + + " (fitted " + Global::doubleToString(fittedElo) + " ELO +/- " + Global::doubleToString(result.eloSe) + + ", " + Global::intToString(result.totalGames) + " games, " + Global::intToString(result.rounds) + + " rounds, converged=" + (result.converged ? "yes" : "no") + ")"); + + delete gameRunner; + delete mainNNEval; + delete humanNNEval; + return 0; +} diff --git a/cpp/configs/gtp_human10k.cfg b/cpp/configs/gtp_human10k.cfg new file mode 100644 index 0000000000..c42b645312 --- /dev/null +++ b/cpp/configs/gtp_human10k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_10k (Black) vs gtp_human9k.cfg (White) = 50.0% [42.0,58.0] over 148 games. λ=0.59036. LOCK. +# gtp_human10k.cfg — 10k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_10k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human9k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human10k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_10k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.59036 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human11k.cfg b/cpp/configs/gtp_human11k.cfg new file mode 100644 index 0000000000..a649b918bc --- /dev/null +++ b/cpp/configs/gtp_human11k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_11k (Black) vs gtp_human10k.cfg (White) = 48.1% [40.5,55.8] over 160 games. λ=0.56458. LOCK. +# gtp_human11k.cfg — 11k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_11k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human10k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human11k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_11k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.56458 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human12k.cfg b/cpp/configs/gtp_human12k.cfg new file mode 100644 index 0000000000..3092bfb07f --- /dev/null +++ b/cpp/configs/gtp_human12k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_12k (Black) vs gtp_human11k.cfg (White) = 50.8% [42.2,59.3] over 128 games. λ=0.54297. LOCK. +# gtp_human12k.cfg — 12k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_12k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human11k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human12k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_12k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.54297 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human13k.cfg b/cpp/configs/gtp_human13k.cfg new file mode 100644 index 0000000000..48420b59d6 --- /dev/null +++ b/cpp/configs/gtp_human13k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_13k (Black) vs gtp_human12k.cfg (White) = 50.8% [42.1,59.4] over 124 games. λ=0.58977. LOCK. +# gtp_human13k.cfg — 13k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_13k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human12k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human13k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_13k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.58977 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human14k.cfg b/cpp/configs/gtp_human14k.cfg new file mode 100644 index 0000000000..9a94c33c7f --- /dev/null +++ b/cpp/configs/gtp_human14k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_14k (Black) vs gtp_human13k.cfg (White) = 50.0% [41.3,58.7] over 124 games. λ=0.61625. LOCK. +# gtp_human14k.cfg — 14k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_14k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human13k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human14k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_14k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.61625 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human15k.cfg b/cpp/configs/gtp_human15k.cfg new file mode 100644 index 0000000000..dae336b9e1 --- /dev/null +++ b/cpp/configs/gtp_human15k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_15k (Black) vs gtp_human14k.cfg (White) = 49.1% [40.2,58.1] over 116 games. λ=0.61839. LOCK. +# gtp_human15k.cfg — 15k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_15k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human14k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human15k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_15k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.61839 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human16k.cfg b/cpp/configs/gtp_human16k.cfg new file mode 100644 index 0000000000..4402a8cbe7 --- /dev/null +++ b/cpp/configs/gtp_human16k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_16k (Black) vs gtp_human15k.cfg (White) = 50.0% [42.1,57.9] over 152 games. λ=0.67050. LOCK. +# gtp_human16k.cfg — 16k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_16k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human15k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human16k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_16k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.67050 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human17k.cfg b/cpp/configs/gtp_human17k.cfg new file mode 100644 index 0000000000..fbf336f4ab --- /dev/null +++ b/cpp/configs/gtp_human17k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_17k (Black) vs gtp_human16k.cfg (White) = 48.3% [40.9,55.7] over 172 games. λ=0.74130. LOCK. +# gtp_human17k.cfg — 17k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_17k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human16k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human17k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_17k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.74130 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human18k.cfg b/cpp/configs/gtp_human18k.cfg new file mode 100644 index 0000000000..d720e39d3c --- /dev/null +++ b/cpp/configs/gtp_human18k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_18k (Black) vs gtp_human17k.cfg (White) = 46.3% [40.4,52.2] over 268 games. λ=0.78210. LOCK. +# gtp_human18k.cfg — 18k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_18k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human17k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human18k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_18k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.78210 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human19k.cfg b/cpp/configs/gtp_human19k.cfg new file mode 100644 index 0000000000..7813ec405f --- /dev/null +++ b/cpp/configs/gtp_human19k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_19k (Black) vs gtp_human18k.cfg (White) = 50.0% [41.0,59.0] over 116 games. λ=0.89820. LOCK. +# gtp_human19k.cfg — 19k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_19k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human18k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human19k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_19k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.89820 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human1d.cfg b/cpp/configs/gtp_human1d.cfg new file mode 100644 index 0000000000..9d9cf87f04 --- /dev/null +++ b/cpp/configs/gtp_human1d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_1d (Black) vs gtp_human2d.cfg (White) = 49.1% [42.5,55.7] over 216 games. λ=0.50930. LOCK. +# gtp_human1d.cfg — 1d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_1d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human2d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human1d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_1d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.50930 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human1k.cfg b/cpp/configs/gtp_human1k.cfg new file mode 100644 index 0000000000..926af4a360 --- /dev/null +++ b/cpp/configs/gtp_human1k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_1k (Black) vs gtp_human1d.cfg (White) = 50.7% [42.5,58.9] over 140 games. λ=0.48988. LOCK. +# gtp_human1k.cfg — 1k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_1k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human1d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human1k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_1k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.48988 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human20k.cfg b/cpp/configs/gtp_human20k.cfg new file mode 100644 index 0000000000..e776a162fe --- /dev/null +++ b/cpp/configs/gtp_human20k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_20k (Black) vs gtp_human19k.cfg (White) = 50.0% [40.6,59.4] over 104 games. λ=1.22270. LOCK. +# gtp_human20k.cfg — 20k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_20k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human19k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human20k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_20k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 1.22270 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human2d.cfg b/cpp/configs/gtp_human2d.cfg new file mode 100644 index 0000000000..b984fa8e14 --- /dev/null +++ b/cpp/configs/gtp_human2d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_2d (Black) vs gtp_human3d.cfg (White) = 50.0% [41.9,58.1] over 144 games. λ=0.51330. LOCK. +# gtp_human2d.cfg — 2d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_2d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human3d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human2d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_2d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.51330 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human2k.cfg b/cpp/configs/gtp_human2k.cfg new file mode 100644 index 0000000000..7d297557da --- /dev/null +++ b/cpp/configs/gtp_human2k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_2k (Black) vs gtp_human1k.cfg (White) = 48.2% [40.8,55.7] over 168 games. λ=0.46755. LOCK. +# gtp_human2k.cfg — 2k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_2k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human1k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human2k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_2k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.46755 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human3d.cfg b/cpp/configs/gtp_human3d.cfg new file mode 100644 index 0000000000..3ace6c586a --- /dev/null +++ b/cpp/configs/gtp_human3d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_3d (Black) vs gtp_human4d.cfg (White) = 51.5% [43.1,59.7] over 136 games. λ=0.45556. LOCK. +# gtp_human3d.cfg — 3d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_3d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human4d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human3d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_3d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.45556 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human3k.cfg b/cpp/configs/gtp_human3k.cfg new file mode 100644 index 0000000000..ed71d014eb --- /dev/null +++ b/cpp/configs/gtp_human3k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_3k (Black) vs gtp_human2k.cfg (White) = 50.0% [41.5,58.5] over 128 games. λ=0.49173. LOCK. +# gtp_human3k.cfg — 3k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_3k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human2k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human3k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_3k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.49173 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human4d.cfg b/cpp/configs/gtp_human4d.cfg new file mode 100644 index 0000000000..058b6f3ed0 --- /dev/null +++ b/cpp/configs/gtp_human4d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_4d (Black) vs gtp_human5d.cfg (White) = 50.0% [43.9,56.1] over 256 games. λ=0.37300. LOCK. +# gtp_human4d.cfg — 4d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_4d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human5d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human4d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_4d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.37300 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human4k.cfg b/cpp/configs/gtp_human4k.cfg new file mode 100644 index 0000000000..0bb2993608 --- /dev/null +++ b/cpp/configs/gtp_human4k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_4k (Black) vs gtp_human3k.cfg (White) = 48.1% [40.5,55.8] over 160 games. λ=0.47130. LOCK. +# gtp_human4k.cfg — 4k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_4k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human3k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human4k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_4k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.47130 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human5d.cfg b/cpp/configs/gtp_human5d.cfg new file mode 100644 index 0000000000..73f2886cc6 --- /dev/null +++ b/cpp/configs/gtp_human5d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_5d (Black) vs gtp_human6d.cfg (White) = 51.2% [43.6,58.8] over 164 games. λ=0.28064. LOCK. +# gtp_human5d.cfg — 5d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_5d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human6d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human5d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_5d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.28064 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human5k.cfg b/cpp/configs/gtp_human5k.cfg new file mode 100644 index 0000000000..89b1310783 --- /dev/null +++ b/cpp/configs/gtp_human5k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_5k (Black) vs gtp_human4k.cfg (White) = 51.2% [43.6,58.9] over 160 games. λ=0.50720. LOCK. +# gtp_human5k.cfg — 5k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_5k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human4k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human5k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_5k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.50720 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human6d.cfg b/cpp/configs/gtp_human6d.cfg new file mode 100644 index 0000000000..f97539269a --- /dev/null +++ b/cpp/configs/gtp_human6d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_6d (Black) vs gtp_human7d.cfg (White) = 52.0% [44.1,59.8] over 152 games. λ=0.19830. LOCK. +# gtp_human6d.cfg — 6d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_6d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human7d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human6d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_6d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.19830 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human6k.cfg b/cpp/configs/gtp_human6k.cfg new file mode 100644 index 0000000000..187336907e --- /dev/null +++ b/cpp/configs/gtp_human6k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_6k (Black) vs gtp_human5k.cfg (White) = 50.8% [42.0,59.6] over 120 games. λ=0.48925. LOCK. +# gtp_human6k.cfg — 6k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_6k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human5k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human6k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_6k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.48925 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human7d.cfg b/cpp/configs/gtp_human7d.cfg new file mode 100644 index 0000000000..9103789033 --- /dev/null +++ b/cpp/configs/gtp_human7d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_7d (Black) vs gtp_human8d.cfg (White) = 48.6% [40.6,56.7] over 144 games. λ=0.12670. LOCK. +# gtp_human7d.cfg — 7d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_7d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human8d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human7d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_7d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.12670 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human7k.cfg b/cpp/configs/gtp_human7k.cfg new file mode 100644 index 0000000000..a7feef8de2 --- /dev/null +++ b/cpp/configs/gtp_human7k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_7k (Black) vs gtp_human6k.cfg (White) = 50.9% [41.8,60.0] over 112 games. λ=0.53370. LOCK. +# gtp_human7k.cfg — 7k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_7k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human6k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human7k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_7k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.53370 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human8d.cfg b/cpp/configs/gtp_human8d.cfg new file mode 100644 index 0000000000..85cb48e7f8 --- /dev/null +++ b/cpp/configs/gtp_human8d.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_8d (Black) vs gtp_human9d.cfg (White) = 47.0% [40.5,53.6] over 219 games. λ=0.08680. LOCK. +# gtp_human8d.cfg — 8d rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_8d (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human9d.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human8d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_8d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.08680 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human8k.cfg b/cpp/configs/gtp_human8k.cfg new file mode 100644 index 0000000000..3129e6927b --- /dev/null +++ b/cpp/configs/gtp_human8k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_8k (Black) vs gtp_human7k.cfg (White) = 49.1% [40.2,58.1] over 116 games. λ=0.50640. LOCK. +# gtp_human8k.cfg — 8k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_8k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human7k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human8k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_8k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.50640 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human9d.cfg b/cpp/configs/gtp_human9d.cfg new file mode 100644 index 0000000000..2d817a562a --- /dev/null +++ b/cpp/configs/gtp_human9d.cfg @@ -0,0 +1,86 @@ +# gtp_human9d.cfg — 9d anchor of the Human-SL KGS-rank ladder (even-game parity vs rank_9d). +# +# This is the TOP rung (9d = 0 ELO) of the gtp_human.cfg ladder documented in +# docs/HumanSL_Rank_Ladder.md. It imitates a 9d player with the Human-SL net plus KataGo search. +# +# CALIBRATION (see docs/HumanSL_Rank_Ladder.md → Results): +# candidate = preaz_9d @ 400 visits, humanSLChosenMovePiklLambda = 0.045 +# baseline = rank_9d @ 400 visits, humanSLChosenMovePiklLambda = 0.08 (a DIFFERENT profile) +# measured = 201/383 games = 52.5% = +17 ELO, 95% CI [-18, +52] (statistically at parity) +# method = `katago tunehuman`, fixed-lambda bracket at 400v (see Reproduction) +# The ONLY change from cpp/configs/gtp_human9d_search_example.cfg is piklLambda 0.08 -> 0.045, +# which makes the preaz_9d profile match the rank_9d reference's strength. The lambda response is +# shallow near parity (~23 ELO per 0.01 lambda), so lambda in [0.04, 0.05] all sit within ~±25 ELO. +# +# Run requires the Human-SL model on the command line, e.g.: +# ./katago gtp -config gtp_human9d.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +# Human model: https://github.com/lightvector/KataGo/releases/tag/v1.15.0 + +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# Near-pro 9d needs the reference's full search; 1-visit play cannot match a 400-visit bot of the +# same model. To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate human amateur 9d players (roughly based on ~KGS ranks) +humanSLProfile = preaz_9d +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Calibrated strength dial: 0.045 makes preaz_9d@400v match the rank_9d@400v reference (0 ELO). +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +# (Reference example ships 0.08; we lowered it to 0.045 to reach 9d parity with the rank_9d profile.) +humanSLChosenMovePiklLambda = 0.045 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/configs/gtp_human9k.cfg b/cpp/configs/gtp_human9k.cfg new file mode 100644 index 0000000000..2ba71978c3 --- /dev/null +++ b/cpp/configs/gtp_human9k.cfg @@ -0,0 +1,74 @@ +# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_9k (Black) vs gtp_human8k.cfg (White) = 48.0% [41.3,54.9] over 204 games. λ=0.53880. LOCK. +# gtp_human9k.cfg — 9k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md. +# Tuned so preaz_9k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human8k.cfg (White) +# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits. +# +# Run: ./katago gtp -config gtp_human9k.cfg -model .bin.gz -human-model b18c384nbt-humanv0.bin.gz +logDir = gtp_logs +logAllGTPCommunication = true +logSearchInfo = true +logSearchInfoForChosenMove = false +logToStderr = false + +rules = japanese + +allowResignation = true +resignThreshold = -0.98 +resignConsecTurns = 10 +resignMinScoreDifference = 20 +resignMinMovesPerBoardArea = 0.40 + +# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits. +# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits. +maxVisits = 400 + +numSearchThreads = 8 +lagBuffer = 1.0 + +delayMoveScale = 2.0 +delayMoveMax = 10.0 + +# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below). +humanSLProfile = preaz_9k +humanSLChosenMoveProp = 1.0 +humanSLChosenMoveIgnorePass = true + +# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger. +# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file. +# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger. +humanSLChosenMovePiklLambda = 0.53880 + +# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda +humanSLRootExploreProbWeightless = 0.8 +humanSLRootExploreProbWeightful = 0.0 +humanSLPlaExploreProbWeightless = 0.0 +humanSLPlaExploreProbWeightful = 0.0 +humanSLOppExploreProbWeightless = 0.0 +humanSLOppExploreProbWeightful = 0.0 + +humanSLCpuctExploration = 0.50 +humanSLCpuctPermanent = 2.0 + +chosenMoveTemperatureEarly = 0.70 +chosenMoveTemperature = 0.25 +chosenMoveTemperatureHalflife = 30 +chosenMoveTemperatureOnlyBelowProb = 1.0 +chosenMoveSubtract = 0 +chosenMovePrune = 0 + +nnCacheSizePowerOfTwo = 20 +nnMutexPoolSizePowerOfTwo = 14 + +ignorePreRootHistory = false +analysisIgnorePreRootHistory = false + +rootNumSymmetriesToSample = 2 +useLcbForSelection = false + +winLossUtilityFactor = 1.0 +staticScoreUtilityFactor = 0.5 +dynamicScoreUtilityFactor = 0.5 + +useUncertainty = false +subtreeValueBiasFactor = 0.0 +useNoisePruning = false diff --git a/cpp/main.cpp b/cpp/main.cpp index 8e4cba8f31..15d27cb4da 100644 --- a/cpp/main.cpp +++ b/cpp/main.cpp @@ -37,6 +37,7 @@ version : Print version and exit. analysis : Runs an engine designed to analyze entire games in parallel. tuner : (OpenCL only) Run tuning to find and optimize parameters that work on your GPU. +tunehuman : Tune human-SL play parameters to hit a target ELO offset relative to a baseline config. ---Selfplay training subcommands--------- @@ -85,6 +86,8 @@ static int handleSubcommand(const string& subcommand, const vector& args return MainCmds::gtp(subArgs); else if(subcommand == "tuner") return MainCmds::tuner(subArgs); + else if(subcommand == "tunehuman") + return MainCmds::tunehuman(subArgs); else if(subcommand == "match") return MainCmds::match(subArgs); else if(subcommand == "selfplay") diff --git a/cpp/main.h b/cpp/main.h index 15581c65e3..f432047cff 100644 --- a/cpp/main.h +++ b/cpp/main.h @@ -9,6 +9,7 @@ namespace MainCmds { int genconfig(const std::vector& args, const std::string& firstCommand); int gtp(const std::vector& args); int tuner(const std::vector& args); + int tunehuman(const std::vector& args); int match(const std::vector& args); int selfplay(const std::vector& args); diff --git a/cpp/program/humansltuner.cpp b/cpp/program/humansltuner.cpp new file mode 100644 index 0000000000..a86b1db753 --- /dev/null +++ b/cpp/program/humansltuner.cpp @@ -0,0 +1,300 @@ +#include "../program/humansltuner.h" + +#include +#include +#include +#include + +static double clipd(double v, double lo, double hi) { + return v < lo ? lo : (v > hi ? hi : v); +} + +static double lerp(double a, double b, double t) { return a + (b - a) * t; } + +LogisticRS::LogisticRS(double l2_) + : l2(l2_), xs(), ws(), ns(), b0(0.0), b1(0.0), fitted(false) { + cov[0][0] = 0.0; cov[0][1] = 0.0; cov[1][0] = 0.0; cov[1][1] = 0.0; +} + +void LogisticRS::addSample(double x, double wins, double games) { + xs.push_back(x); + ws.push_back(wins); + ns.push_back(games); +} + +LogisticRS& LogisticRS::fit(int iters) { + for(int iter = 0; iter < iters; iter++) { + double g0 = l2 * b0; + double g1 = l2 * b1; + double S0 = l2, S1 = 0.0, S2 = l2; + for(size_t i = 0; i < xs.size(); i++) { + double z = clipd(b0 + b1 * xs[i], -30.0, 30.0); + double p = 1.0 / (1.0 + std::exp(-z)); + double resid = ns[i] * p - ws[i]; + g0 += resid; + g1 += xs[i] * resid; + double w = clipd(ns[i] * p * (1.0 - p), 1e-9, std::numeric_limits::infinity()); + S0 += w; + S1 += w * xs[i]; + S2 += w * xs[i] * xs[i]; + } + double H00 = S0, H01 = S1, H10 = S1, H11 = S2; + double det = H00 * H11 - H01 * H10; + if(std::fabs(det) < 1e-12) + continue; + double step0 = clipd((H11 * g0 - H01 * g1) / det, -10.0, 10.0); + double step1 = clipd((-H10 * g0 + H00 * g1) / det, -10.0, 10.0); + b0 -= step0; + b1 -= step1; + } + + // Recompute covariance = (X^T W X + l2 I)^-1 at the final coefficients. + double S0 = l2, S1 = 0.0, S2 = l2; + for(size_t i = 0; i < xs.size(); i++) { + double z = clipd(b0 + b1 * xs[i], -30.0, 30.0); + double p = 1.0 / (1.0 + std::exp(-z)); + double w = clipd(ns[i] * p * (1.0 - p), 1e-9, std::numeric_limits::infinity()); + S0 += w; + S1 += w * xs[i]; + S2 += w * xs[i] * xs[i]; + } + double det = S0 * S2 - S1 * S1; + if(std::fabs(det) < 1e-12) { + cov[0][0] = cov[0][1] = cov[1][0] = cov[1][1] = 0.0; + } else { + cov[0][0] = S2 / det; + cov[0][1] = -S1 / det; + cov[1][0] = -S1 / det; + cov[1][1] = S0 / det; + } + fitted = true; + return *this; +} + +double LogisticRS::predict(double x) const { + double z = clipd(b0 + b1 * x, -30.0, 30.0); + return 1.0 / (1.0 + std::exp(-z)); +} + +double LogisticRS::root(double targetWinrate) const { + if(std::fabs(b1) < 1e-9) + return std::nan(""); + double logitT = std::log(targetWinrate / (1.0 - targetWinrate)); + return (logitT - b0) / b1; +} + +double LogisticRS::rootSeElo(double targetWinrate) const { + if(!fitted || std::fabs(b1) < 1e-9) + return std::numeric_limits::infinity(); + double logitT = std::log(targetWinrate / (1.0 - targetWinrate)); + double dx_db0 = -1.0 / b1; + double dx_db1 = -(logitT - b0) / (b1 * b1); + double varX = dx_db0 * dx_db0 * cov[0][0] + + 2.0 * dx_db0 * dx_db1 * cov[0][1] + + dx_db1 * dx_db1 * cov[1][1]; + double eloPerX = std::fabs(b1) * LogisticRS::ELO_PER_LOGIT; + return eloPerX * std::sqrt(std::max(varX, 0.0)); +} + +int LogisticRS::distinctXCount(double eps) const { + std::vector sorted = xs; + std::sort(sorted.begin(), sorted.end()); + int count = 0; + for(size_t i = 0; i < sorted.size(); i++) { + if(i == 0 || sorted[i] - sorted[i - 1] > eps) + count++; + } + return count; +} + +VisitBudget resolveVisitBudget(int64_t baselineMaxVisits, int userSearchVisits, int userMaxVisitsCap) { + const int64_t NO_REAL_CAP = (int64_t)1 << 50; // == SearchParams ctor default (search bounded elsewhere) + const int LEGACY_ANCHOR = 100; // fallback anchor when the baseline has no finite cap + const int ABS_MAX = 1000000; // hard ceiling so a finite-but-huge int64 baseline can't overflow int + + bool baselineHasCap = (baselineMaxVisits > 0 && baselineMaxVisits < NO_REAL_CAP); + int anchor = baselineHasCap + ? (int)std::min(baselineMaxVisits, (int64_t)ABS_MAX) + : LEGACY_ANCHOR; + + // Segment B depth: piklLambda is inert below 2 visits, so floor at 2. + int rawMid = (userSearchVisits == -1) ? anchor : userSearchVisits; + int midVisits = std::max(2, rawMid); + bool flooredFromBelow2 = (rawMid < 2); + + // Segment C strong end: auto climbs back to the baseline anchor (never above it); explicit is honored + // but never below mid, so the segment-C log2 interpolation never runs downward. + int rawCap = (userMaxVisitsCap == -1) ? std::max(midVisits, anchor) : userMaxVisitsCap; + int maxVisitsCap = std::max(midVisits, rawCap); + + bool raisesAboveBaseline = baselineHasCap && + ((int64_t)midVisits > baselineMaxVisits || (int64_t)maxVisitsCap > baselineMaxVisits); + + return VisitBudget{midVisits, maxVisitsCap, raisesAboveBaseline, flooredFromBelow2, anchor, baselineHasCap}; +} + +double effectiveXHi(const VisitBudget& vb, double xLo, double xHi) { + if(vb.maxVisitsCap == vb.midVisits && xHi > 2.0 && xLo < 2.0) + return 2.0; + return xHi; +} + +StrengthDialParams strengthDialToParams(double x, const StrengthDialConfig& c) { + x = clipd(x, 0.0, 3.0); + StrengthDialParams out; + if(x < 1.0) { + // Segment A (weak): temperature lever at 1 visit (piklLambda is inert at 1 visit). + out.maxVisits = 1; + out.piklLambda = StrengthDialConfig::PIKL_INERT; + out.deltaTau = c.dtauMax * (1.0 - x); + } else if(x < 2.0) { + // Segment B (mid): piklLambda lever with search on. + out.maxVisits = c.searchVisits; + double lg = lerp(std::log10(c.piklMax), std::log10(c.piklFloor), x - 1.0); + out.piklLambda = std::pow(10.0, lg); + out.deltaTau = 0.0; + } else { + // Segment C (strong): visits lever, piklLambda fully trusted. + double lg = lerp(std::log2((double)c.searchVisits), std::log2((double)c.maxVisitsCap), x - 2.0); + out.maxVisits = (int)std::lround(std::pow(2.0, lg)); + out.piklLambda = c.piklFloor; + out.deltaTau = 0.0; + } + return out; +} + +CalibrationResult calibrateToTarget( + const std::function(double)>& playAt, + double xLo, double xHi, double targetWinrate, + int gamesPerRound, int maxRounds, double eloTol, + uint64_t rngSeed, double l2, + const std::function& onRound, + const std::vector& initialSamples, + const std::function& onSampleCollected +) { + (void)gamesPerRound; // games count comes from playAt's return value + LogisticRS rs(l2); + double xStar = 0.5 * (xLo + xHi); + double se = std::numeric_limits::infinity(); + int totalGames = 0; + bool converged = false; + + // Resume: seed the fit with any prior rounds' samples so an interrupted calibration continues instead + // of restarting. The round loop then begins at initialSamples.size(). + for(const CalibrationSample& s : initialSamples) { + rs.addSample(s.x, s.wins, s.games); + totalGames += (int)s.games; + } + const int startRound = (int)initialSamples.size(); + int roundsRun = startRound; + if(startRound > 0) { + rs.fit(); + double r0 = rs.root(targetWinrate); + if(std::isfinite(r0)) + xStar = clipd(r0, xLo, xHi); + se = rs.rootSeElo(targetWinrate); + // If the reloaded samples already satisfy convergence, finish without playing any more games. + if(startRound >= 4 && rs.distinctXCount() >= 4 && se <= eloTol) + converged = true; + } + + // Perturbing the seed by startRound keeps each resumed chunk exploring fresh offsets; for the + // from-scratch path (startRound == 0) this is exactly rngSeed, so that path is byte-identical to before. + std::mt19937_64 rng(rngSeed + 0x9e3779b97f4a7c15ULL * (uint64_t)startRound); + std::uniform_real_distribution uniform(xLo, xHi); + + for(int rnd = startRound; !converged && rnd < maxRounds; rnd++) { + roundsRun = rnd + 1; + double x; + if(rnd < 2) { + x = uniform(rng); // explore uniformly the first 2 rounds + } else { + double sigma = std::max(0.05, 0.5 * (xHi - xLo) * std::pow(0.85, (double)rnd)); + std::normal_distribution gaussian(0.0, sigma); + x = clipd(xStar + gaussian(rng), xLo, xHi); + } + std::pair res = playAt(x); + double wins = res.first; + int games = res.second; + rs.addSample(x, wins, (double)games); + rs.fit(); + double r = rs.root(targetWinrate); + if(std::isfinite(r)) + xStar = clipd(r, xLo, xHi); + se = rs.rootSeElo(targetWinrate); + totalGames += games; + if(onSampleCollected) + onSampleCollected(x, wins, (double)games); + if(onRound) + onRound(rnd, xStar, se, rs.distinctXCount(), totalGames); + if(rnd >= 3 && rs.distinctXCount() >= 4 && se <= eloTol) { + converged = true; + break; + } + } + + CalibrationResult result; + result.xStar = xStar; + result.eloSe = se; + result.totalGames = totalGames; + result.rounds = roundsRun; + result.converged = converged; + result.model = rs; + return result; +} + +std::string overrideConfigText( + const std::string& baselineText, + const std::vector>& overrides +) { + // Split into lines (dropping CR), remembering content; we re-join with '\n'. + std::vector lines; + { + std::string cur; + for(char ch : baselineText) { + if(ch == '\n') { lines.push_back(cur); cur.clear(); } + else if(ch == '\r') { /* drop */ } + else cur.push_back(ch); + } + lines.push_back(cur); + } + + std::vector applied(overrides.size(), false); + + for(std::string& line : lines) { + size_t start = 0; + while(start < line.size() && (line[start] == ' ' || line[start] == '\t')) + start++; + if(start >= line.size() || line[start] == '#') + continue; + size_t eq = line.find('=', start); + if(eq == std::string::npos) + continue; + size_t keyEnd = start; + while(keyEnd < eq && line[keyEnd] != ' ' && line[keyEnd] != '\t') + keyEnd++; + std::string key = line.substr(start, keyEnd - start); + for(size_t k = 0; k < overrides.size(); k++) { + if(!applied[k] && key == overrides[k].first) { + line = line.substr(0, start) + key + " = " + overrides[k].second; + applied[k] = true; + break; + } + } + } + + std::string out; + for(size_t i = 0; i < lines.size(); i++) { + out += lines[i]; + if(i + 1 < lines.size()) + out += "\n"; + } + for(size_t k = 0; k < overrides.size(); k++) { + if(!applied[k]) { + if(!out.empty() && out.back() != '\n') + out += "\n"; + out += overrides[k].first + " = " + overrides[k].second + "\n"; + } + } + return out; +} diff --git a/cpp/program/humansltuner.h b/cpp/program/humansltuner.h new file mode 100644 index 0000000000..8e15783ff4 --- /dev/null +++ b/cpp/program/humansltuner.h @@ -0,0 +1,128 @@ +#ifndef PROGRAM_HUMANSLTUNER_H_ +#define PROGRAM_HUMANSLTUNER_H_ + +#include +#include +#include +#include +#include + +// Pure math + round loop for the `tunehuman` subcommand. +// NO KataGo NN/search dependencies — fully unit-testable without model files. + +// Binomial logistic regression winrate(x) = sigmoid(b0 + b1*x), fit by +// L2-regularized Newton-MAP. Linear logit because the strength coordinate is monotone. +class LogisticRS { + public: + // 400 / ln(10): converts a logit difference to ELO. + static constexpr double ELO_PER_LOGIT = 400.0 / 2.302585092994046; // ~173.7178 + + explicit LogisticRS(double l2_ = 0.5); + + void addSample(double x, double wins, double games); // wins may be fractional (draws = 0.5) + LogisticRS& fit(int iters = 50); + double predict(double x) const; // sigmoid(b0 + b1 x) + double root(double targetWinrate) const; // x* with predict(x*) == target; NaN if degenerate + double rootSeElo(double targetWinrate) const; // delta-method SE of x*, in ELO units; +inf if degenerate + int distinctXCount(double eps = 1e-6) const; // number of distinct sampled x values + + double getB0() const { return b0; } + double getB1() const { return b1; } + + private: + double l2; + std::vector xs; + std::vector ws; + std::vector ns; + double b0; + double b1; + double cov[2][2]; // covariance of (b0,b1); valid after fit() + bool fitted; +}; + +struct StrengthDialParams { + double piklLambda; + int maxVisits; + double deltaTau; +}; + +struct StrengthDialConfig { + double piklFloor = 0.02; + double piklMax = 1.0e4; + int searchVisits = 100; // must be >= 2 + int maxVisitsCap = 400; + double dtauMax = 0.6; + static constexpr double PIKL_INERT = 1.0e9; // KataGo default; "off" +}; + +// Resolved per-run visit budget, anchored to the baseline config's own maxVisits so the +// candidate never spends MORE compute than the baseline unless the operator explicitly opts in. +struct VisitBudget { + int midVisits; // -> StrengthDialConfig.searchVisits (segment B depth / segment-C low anchor) + int maxVisitsCap; // -> StrengthDialConfig.maxVisitsCap (segment C strong end); always >= midVisits + bool raisesAboveBaseline; // true iff baseline has a finite cap and (midVisits > B || maxVisitsCap > B) + bool flooredFromBelow2; // true iff a sub-2 mid (incl. a B<2 auto baseline) was bumped up to 2 + int effectiveBaseline; // the anchor: baseline cap (clamped to 1e6) when finite, else the legacy 100; + // this is the value an explicit -search-visits/-max-visits-cap is judged against + bool baselineHasCap; // false when the baseline omits maxVisits (search bounded by time/playouts) +}; + +// Pure, NN-free. baselineMaxVisits is SearchParams.maxVisits (int64_t; the ctor default 1<<50 means +// "no real cap" -- search is bounded by time/playouts instead). userSearchVisits / userMaxVisitsCap +// use -1 as the "auto" sentinel (anchor to the baseline); any other value is the explicit operator +// override. midVisits is floored to 2 (piklLambda is inert below 2 visits) and maxVisitsCap is clamped +// up to midVisits (so segment C's log2 interpolation never runs downward). A finite-but-absurd baseline +// (> 1e6 yet < 1<<50) is clamped to 1e6 so the int dial fields cannot overflow. +VisitBudget resolveVisitBudget(int64_t baselineMaxVisits, int userSearchVisits, int userMaxVisitsCap); + +// Returns the strength-coordinate upper bound to actually calibrate over. When segment C is flat +// (maxVisitsCap == midVisits, the auto outcome), the strong third [2,3] collapses to a single point, +// so calibrating there is meaningless: shrink to 2.0 (only when the original range straddles it, +// i.e. xHi > 2.0 && xLo < 2.0). Otherwise returns xHi unchanged. Pure, NN-free. +double effectiveXHi(const VisitBudget& vb, double xLo, double xHi); + +// Maps a scalar strength coordinate x in [0,3] (low=weak, high=strong) to the three dials, +// globally monotone in strength. Clamps x to [0,3]. +StrengthDialParams strengthDialToParams(double x, const StrengthDialConfig& c); + +struct CalibrationResult { + double xStar; + double eloSe; // 1-sigma CI half-width in ELO at xStar + int totalGames; + int rounds; + bool converged; + LogisticRS model; // final fitted surface (for reporting fitted ELO) +}; + +// One played round's outcome: a dial coordinate and its {wins, games} tally. Persisted to disk by the +// `tunehuman` command (one per round) so a calibration that is interrupted -- e.g. the process is killed +// by an environment runtime cap -- can be resumed from where it left off instead of restarting. +struct CalibrationSample { + double x; + double wins; // may be fractional (draws = 0.5) + double games; +}; + +// playAt(x) plays a batch at dial x and returns {wins, games}; wins may be fractional. +// onRound(round, xStar, eloSe, distinctXs, totalGames) is optional progress logging. +// initialSamples seeds the fit with prior rounds (for resume): the round loop starts at +// initialSamples.size() and, if those samples already satisfy convergence, returns without playing more. +// onSampleCollected(x, wins, games) fires once per NEWLY played round (not for initialSamples), so the +// caller can durably append each round's outcome to a checkpoint file. +CalibrationResult calibrateToTarget( + const std::function(double)>& playAt, + double xLo, double xHi, double targetWinrate, + int gamesPerRound, int maxRounds, double eloTol, + uint64_t rngSeed, double l2 = 0.5, + const std::function& onRound = nullptr, + const std::vector& initialSamples = std::vector(), + const std::function& onSampleCollected = nullptr); + +// Rewrites baselineText, replacing the value of each override key on its existing +// non-comment "key = value" line (preserving the key spelling), or appending +// "key = value" at the end if absent. All other lines/comments are left intact. +std::string overrideConfigText( + const std::string& baselineText, + const std::vector>& overrides); + +#endif // PROGRAM_HUMANSLTUNER_H_ diff --git a/cpp/program/play.cpp b/cpp/program/play.cpp index 4b10ce8d7d..b29ebfd50a 100644 --- a/cpp/program/play.cpp +++ b/cpp/program/play.cpp @@ -1260,12 +1260,12 @@ FinishedGameData* Play::runGame( Search* botB; Search* botW; if(botSpecB.botIdx == botSpecW.botIdx) { - botB = new Search(botSpecB.baseParams, botSpecB.nnEval, &logger, searchRandSeed); + botB = new Search(botSpecB.baseParams, botSpecB.nnEval, botSpecB.humanEval, &logger, searchRandSeed); botW = botB; } else { - botB = new Search(botSpecB.baseParams, botSpecB.nnEval, &logger, searchRandSeed + "@B"); - botW = new Search(botSpecW.baseParams, botSpecW.nnEval, &logger, searchRandSeed + "@W"); + botB = new Search(botSpecB.baseParams, botSpecB.nnEval, botSpecB.humanEval, &logger, searchRandSeed + "@B"); + botW = new Search(botSpecW.baseParams, botSpecW.nnEval, botSpecW.humanEval, &logger, searchRandSeed + "@W"); } FinishedGameData* gameData = runGame( @@ -2409,12 +2409,12 @@ FinishedGameData* GameRunner::runGame( Search* botB; Search* botW; if(botSpecB.botIdx == botSpecW.botIdx) { - botB = new Search(botSpecB.baseParams, botSpecB.nnEval, &logger, seed); + botB = new Search(botSpecB.baseParams, botSpecB.nnEval, botSpecB.humanEval, &logger, seed); botW = botB; } else { - botB = new Search(botSpecB.baseParams, botSpecB.nnEval, &logger, seed + "@B"); - botW = new Search(botSpecW.baseParams, botSpecW.nnEval, &logger, seed + "@W"); + botB = new Search(botSpecB.baseParams, botSpecB.nnEval, botSpecB.humanEval, &logger, seed + "@B"); + botW = new Search(botSpecW.baseParams, botSpecW.nnEval, botSpecW.humanEval, &logger, seed + "@W"); } if(afterInitialization != nullptr) { if(botSpecB.botIdx == botSpecW.botIdx) { diff --git a/cpp/program/play.h b/cpp/program/play.h index eea746a7c9..8a8714fc68 100644 --- a/cpp/program/play.h +++ b/cpp/program/play.h @@ -201,6 +201,7 @@ class MatchPairer { int botIdx; std::string botName; NNEvaluator* nnEval; + NNEvaluator* humanEval = NULL; SearchParams baseParams; }; diff --git a/cpp/tests/testhumansltuner.cpp b/cpp/tests/testhumansltuner.cpp new file mode 100644 index 0000000000..8fc358219e --- /dev/null +++ b/cpp/tests/testhumansltuner.cpp @@ -0,0 +1,483 @@ +#include "../tests/tests.h" + +#include "../program/humansltuner.h" + +#include +#include +#include +#include +#include + +using namespace std; + +static double sigmoid(double z) { return 1.0 / (1.0 + std::exp(-z)); } + +void Tests::runHumanSLTunerTests() { + cout << "Running human SL tuner tests" << endl; + + // Test 1: LogisticRS recovers known coefficients. + { + LogisticRS rs(0.5); + double xsv[] = {-2.0, -1.0, 0.0, 1.0, 2.0}; + for(double x : xsv) { + double p = sigmoid(0.5 - 2.0 * x); + rs.addSample(x, std::round(1000.0 * p), 1000.0); + } + rs.fit(); + testAssert(std::fabs(rs.getB0() - 0.5) < 0.1); + testAssert(std::fabs(rs.getB1() + 2.0) < 0.1); + } + + // Test 2: root recovers the target dial. + { + LogisticRS rs(0.5); + double xsv[] = {-2.0, -1.0, 0.0, 1.0, 2.0, 3.0}; + for(double x : xsv) { + double p = sigmoid(-x); // b ~ (0, -1) + rs.addSample(x, std::round(1000.0 * p), 1000.0); + } + rs.fit(); + testAssert(std::fabs(rs.root(0.36) - 0.5754) < 0.05); + testAssert(rs.rootSeElo(0.36) < 50.0); + } + + // Test 3: CI shrinks with more data. + { + LogisticRS big(0.5), small(0.5); + double xsv[] = {-2.0, -1.0, 0.0, 1.0, 2.0, 3.0}; + for(double x : xsv) { + double p = sigmoid(-x); + big.addSample(x, std::round(2000.0 * p), 2000.0); + small.addSample(x, std::round(80.0 * p), 80.0); + } + big.fit(); + small.fit(); + testAssert(big.rootSeElo(0.36) < small.rootSeElo(0.36)); + } + + // Test 4: dial schedule monotonicity and continuity. + { + StrengthDialConfig c; // defaults + double prevDtauA = 1e18; + double prevPiklB = 1e18; + int prevVisC = -1; + for(int i = 0; i <= 60; i++) { + double x = i * 0.05; + StrengthDialParams p = strengthDialToParams(x, c); + if(x < 1.0) { + testAssert(p.maxVisits == 1); + testAssert(p.piklLambda == StrengthDialConfig::PIKL_INERT); + testAssert(p.deltaTau <= prevDtauA + 1e-12); + prevDtauA = p.deltaTau; + } else if(x < 2.0) { + testAssert(p.maxVisits == c.searchVisits); + testAssert(p.deltaTau == 0.0); + testAssert(p.piklLambda <= prevPiklB + 1e-9); + prevPiklB = p.piklLambda; + } else { + testAssert(std::fabs(p.piklLambda - c.piklFloor) < 1e-12); + testAssert(p.maxVisits >= prevVisC); + prevVisC = p.maxVisits; + } + } + // Continuity at x == 2: both sides give maxVisits == searchVisits and piklLambda == piklFloor. + StrengthDialParams justBelow = strengthDialToParams(2.0 - 1e-9, c); + StrengthDialParams at2 = strengthDialToParams(2.0, c); + testAssert(at2.maxVisits == c.searchVisits); + testAssert(justBelow.maxVisits == c.searchVisits); + testAssert(std::fabs(at2.piklLambda - c.piklFloor) < 1e-9); + testAssert(std::fabs(justBelow.piklLambda - c.piklFloor) < 1e-6); + } + + // Test 5: calibrateToTarget is unbiased with an honest CI. Deterministic (fixed seeds). + { + auto winrateOfElo = [](double elo) { return 1.0 / (1.0 + std::pow(10.0, -elo / 400.0)); }; + + auto runScenario = [&](const std::function& eloFn) { + const int numSeeds = 100; + double sumErr = 0.0, sumSqErr = 0.0; + int cover1 = 0, cover2 = 0; + for(int s = 0; s < numSeeds; s++) { + std::mt19937_64 playRng((uint64_t)(1000 + s)); + auto playAt = [&](double x) -> std::pair { + double wr = winrateOfElo(eloFn(x)); + int games = 20; + std::binomial_distribution binom(games, wr); + int wins = binom(playRng); + return std::make_pair((double)wins, games); + }; + CalibrationResult res = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 20, 30, 25.0, (uint64_t)(s + 1), 0.5, nullptr); + double err = eloFn(res.xStar) + 100.0; // true target ELO is -100 + sumErr += err; + sumSqErr += err * err; + if(std::fabs(err) <= res.eloSe) cover1++; + if(std::fabs(err) <= 2.0 * res.eloSe) cover2++; + } + double meanErr = sumErr / numSeeds; + double rmse = std::sqrt(sumSqErr / numSeeds); + double cov1 = (double)cover1 / numSeeds; + double cov2 = (double)cover2 / numSeeds; + testAssert(std::fabs(meanErr) < 15.0); // unbiased + testAssert(rmse < 45.0); + testAssert(cov1 >= 0.55 && cov1 <= 0.90); // honest, not overconfident + testAssert(cov2 >= 0.88); + }; + + runScenario([](double x) { return -100.0 + 300.0 * (x - 0.5); }); + runScenario([](double x) { double d = x - 0.5; return -100.0 + 250.0 * d + 500.0 * d * d * d; }); + } + + // Test 6: overrideConfigText replaces existing keys, ignores comments, appends new keys. + { + std::string input = "a = 1\nb=2\n# c = 3\n"; + std::vector> ov = {{"b", "9"}, {"d", "4"}}; + std::string out = overrideConfigText(input, ov); + testAssert(out == "a = 1\nb = 9\n# c = 3\nd = 4\n"); + } + + // Test 7: an unreachable target pins x* to the boundary, never reports "converged", + // and keeps the reported CI NaN-safe. Exercises the degenerate extrapolation regime + // (candidate far stronger than the target across the whole dial range). + { + auto winrateOfElo = [](double elo) { return 1.0 / (1.0 + std::pow(10.0, -elo / 400.0)); }; + std::mt19937_64 playRng(12345); + auto playAt = [&](double x) -> std::pair { + double elo = 150.0 + 100.0 * x; // always >= +150 ELO; the 0.36 (-100 ELO) root lies below xLo + double wr = winrateOfElo(elo); + int games = 20; + std::binomial_distribution binom(games, wr); + return std::make_pair((double)binom(playRng), games); + }; + CalibrationResult res = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 20, 30, 25.0, (uint64_t)7, 0.5, nullptr); + testAssert(res.converged == false); + testAssert(std::fabs(res.xStar - 0.0) < 1e-6); // pinned to xLo + testAssert(!std::isnan(res.eloSe)); // honest CI: large/inf allowed, NaN never + testAssert(res.eloSe >= 0.0); + testAssert(res.totalGames > 0); + } + + // Test 8: LogisticRS stays NaN-safe under near-degenerate data. + { + // (a) Perfectly separable data (all losses below 0, all wins above). The MLE slope + // diverges; L2 must keep coefficients finite and the reported CI non-NaN. + LogisticRS sep(0.5); + sep.addSample(-1.0, 0.0, 50.0); + sep.addSample(-1.0, 0.0, 50.0); + sep.addSample( 1.0, 50.0, 50.0); + sep.addSample( 1.0, 50.0, 50.0); + sep.fit(); + testAssert(std::isfinite(sep.getB0())); + testAssert(std::isfinite(sep.getB1())); + testAssert(!std::isnan(sep.rootSeElo(0.36))); + + // (b) No spread in x: the slope is unidentified. root() must be NaN (not +-inf) and + // rootSeElo() must be a non-NaN sentinel (+inf), with no crash. + LogisticRS flat(0.5); + for(int i = 0; i < 5; i++) flat.addSample(0.5, 25.0, 50.0); + flat.fit(); + testAssert(std::isfinite(flat.getB0())); + testAssert(std::isfinite(flat.getB1())); + double r = flat.root(0.36); + double se = flat.rootSeElo(0.36); + testAssert(std::isnan(r) || std::isfinite(r)); // defined-or-NaN, never an inf trap + testAssert(!std::isnan(se)); + } + + // Test 9: convergence is structurally impossible with fewer than 4 rounds, even on a + // clean low-noise reachable surface (it requires >= 4 distinct dial samples). This pins + // down the invariant that motivates the CLI's max-rounds warning. + { + auto winrateOfElo = [](double elo) { return 1.0 / (1.0 + std::pow(10.0, -elo / 400.0)); }; + std::mt19937_64 playRng(999); + auto playAt = [&](double x) -> std::pair { + double wr = winrateOfElo(-100.0 + 300.0 * (x - 0.5)); // reachable; -100 ELO at x=0.5 + int games = 200; + std::binomial_distribution binom(games, wr); + return std::make_pair((double)binom(playRng), games); + }; + for(int mr = 1; mr <= 3; mr++) { + CalibrationResult res = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 200, mr, 25.0, (uint64_t)(100 + mr), 0.5, nullptr); + testAssert(res.converged == false); + testAssert(res.rounds == mr); + } + } + + // Test 10: resolveVisitBudget auto+auto anchors mid==cap==baseline and never raises (B in {2,12,400}). + // This is the headline requirement: with both knobs auto, the candidate's visit budget collapses + // onto the baseline, so segment C is flat and visits can never exceed the baseline. + { + int Bs[] = {2, 12, 400}; + for(int B : Bs) { + VisitBudget vb = resolveVisitBudget((int64_t)B, -1, -1); + testAssert(vb.midVisits == B); + testAssert(vb.maxVisitsCap == B); + testAssert(vb.raisesAboveBaseline == false); + testAssert(vb.flooredFromBelow2 == false); + testAssert(vb.midVisits >= 2); + testAssert(vb.maxVisitsCap >= vb.midVisits); + } + } + + // Test 11: explicit -max-visits-cap above baseline is honored and flags raisesAboveBaseline + // (the only intended way visits exceed the baseline -- so the CLI can warn). + { + VisitBudget vb = resolveVisitBudget((int64_t)12, -1, 400); + testAssert(vb.midVisits == 12); + testAssert(vb.maxVisitsCap == 400); + testAssert(vb.raisesAboveBaseline == true); + testAssert(vb.maxVisitsCap >= vb.midVisits); + } + + // Test 12: explicit -max-visits-cap below baseline is clamped UP to mid (segment C never drops below + // segment B, else the log2 interpolation would run downward and break monotonicity); no raise. + { + VisitBudget vb = resolveVisitBudget((int64_t)12, -1, 5); + testAssert(vb.midVisits == 12); + testAssert(vb.maxVisitsCap == 12); + testAssert(vb.maxVisitsCap >= vb.midVisits); + testAssert(vb.raisesAboveBaseline == false); + } + + // Test 13: explicit -search-visits below 2 is floored to 2 (piklLambda needs >1 visit), cap auto-anchors. + { + VisitBudget vb = resolveVisitBudget((int64_t)12, 1, -1); + testAssert(vb.midVisits == 2); + testAssert(vb.maxVisitsCap == 12); + testAssert(vb.flooredFromBelow2 == true); + testAssert(vb.raisesAboveBaseline == false); + VisitBudget vb0 = resolveVisitBudget((int64_t)12, 0, -1); + testAssert(vb0.midVisits == 2); + testAssert(vb0.maxVisitsCap == 12); + testAssert(vb0.flooredFromBelow2 == true); + testAssert(vb0.raisesAboveBaseline == false); + } + + // Test 14: baseline maxVisits==1 edge. piklLambda is inert at 1 visit, so segment B must run at 2; + // that unavoidably raises above the degenerate 1-visit baseline, and the helper must report it (the + // CLI then emits the *soft* floor warning, gated on flooredFromBelow2, not the loud over-baseline one). + // The adjacent non-degenerate baseline B==2 must NOT raise. + { + VisitBudget vb = resolveVisitBudget((int64_t)1, -1, -1); + testAssert(vb.midVisits == 2); + testAssert(vb.maxVisitsCap == 2); + testAssert(vb.raisesAboveBaseline == true); + testAssert(vb.flooredFromBelow2 == true); + VisitBudget vb2 = resolveVisitBudget((int64_t)2, -1, -1); + testAssert(vb2.midVisits == 2); + testAssert(vb2.maxVisitsCap == 2); + testAssert(vb2.raisesAboveBaseline == false); + testAssert(vb2.flooredFromBelow2 == false); + } + + // Test 15: no-cap sentinel. A baseline that omits maxVisits gets SearchParams' ctor default 1<<50; + // the helper must treat that as "no real cap" and anchor to the legacy 100, NEVER to 2^50 (which an + // int signature would have truncated/exploded). B==0 is likewise treated as no-cap. + { + VisitBudget vb = resolveVisitBudget(((int64_t)1) << 50, -1, -1); + testAssert(vb.midVisits == 100); + testAssert(vb.maxVisitsCap == 100); + testAssert(vb.raisesAboveBaseline == false); + testAssert(vb.baselineHasCap == false); + testAssert(vb.effectiveBaseline == 100); + VisitBudget vb0 = resolveVisitBudget((int64_t)0, -1, -1); + testAssert(vb0.midVisits == 100); + testAssert(vb0.maxVisitsCap == 100); + testAssert(vb0.raisesAboveBaseline == false); + testAssert(vb0.baselineHasCap == false); + + // Finite-but-absurd baseline in (1e6, 1<<50): the int64->int anchor clamp must hold at 1e6, NOT + // truncate. Without the ABS_MAX clamp, (int)(1<<40) == 0 and midVisits would collapse to 2. + VisitBudget vbBig = resolveVisitBudget(((int64_t)1) << 40, -1, -1); + testAssert(vbBig.midVisits == 1000000); + testAssert(vbBig.maxVisitsCap == 1000000); + testAssert(vbBig.effectiveBaseline == 1000000); + testAssert(vbBig.raisesAboveBaseline == false); + VisitBudget vbCtl = resolveVisitBudget((int64_t)5000000, -1, -1); // just inside the clamp window + testAssert(vbCtl.midVisits == 1000000); + testAssert(vbCtl.maxVisitsCap == 1000000); + + // No-cap baseline + explicit override: raisesAboveBaseline must stay false (no finite baseline to + // exceed), guarding the baselineHasCap gate; the explicit cap is still honored. + VisitBudget vbNoCapExplicit = resolveVisitBudget(((int64_t)1) << 50, -1, 9999); + testAssert(vbNoCapExplicit.midVisits == 100); + testAssert(vbNoCapExplicit.maxVisitsCap == 9999); + testAssert(vbNoCapExplicit.raisesAboveBaseline == false); + testAssert(vbNoCapExplicit.baselineHasCap == false); + testAssert(vbNoCapExplicit.effectiveBaseline == 100); + } + + // Test 16: dial invariant under auto -- a StrengthDialConfig built from resolveVisitBudget keeps + // strengthDialToParams' maxVisits <= baseline for ALL x (B>=2), and segment C is flat at B on [2,3]. + // This binds the "visits never increase under auto" requirement to the actual dial output. + { + int Bs[] = {2, 12, 400}; + for(int B : Bs) { + VisitBudget vb = resolveVisitBudget((int64_t)B, -1, -1); + testAssert(vb.raisesAboveBaseline == false); + StrengthDialConfig c; // defaults for pikl*/dtau + c.searchVisits = vb.midVisits; + c.maxVisitsCap = vb.maxVisitsCap; + for(int i = 0; i <= 300; i++) { + double x = i * 0.01; // 0.00 .. 3.00 + StrengthDialParams p = strengthDialToParams(x, c); + testAssert(p.maxVisits <= B); + testAssert(p.maxVisits >= 1); + testAssert(p.maxVisits <= c.maxVisitsCap); + if(x >= 2.0) + testAssert(p.maxVisits == B); // segment C flat at baseline + } + } + } + + // Test 17: positive control -- when the user explicitly raises the cap, segment C DOES climb above + // baseline, confirming Test 16's invariant is gated on auto and not vacuously true. + { + int B = 12; + VisitBudget vb = resolveVisitBudget((int64_t)B, -1, 400); + testAssert(vb.raisesAboveBaseline == true); + StrengthDialConfig c; + c.searchVisits = vb.midVisits; + c.maxVisitsCap = vb.maxVisitsCap; + StrengthDialParams strong = strengthDialToParams(3.0, c); + StrengthDialParams mid = strengthDialToParams(1.5, c); + StrengthDialParams weak = strengthDialToParams(0.0, c); + testAssert(strong.maxVisits == 400); + testAssert(strong.maxVisits > B); + testAssert(mid.maxVisits == 12); + testAssert(weak.maxVisits == 1); + } + + // Test 18: explicit -search-visits >= 2 passes through unchanged (SC-3), and an explicit mid above a + // finite baseline raises via the mid lever with flooredFromBelow2==false (SC-5) -- the exact precondition + // for the loud over-baseline warning to fire through the mid lever (no current test reached this). + { + VisitBudget pass = resolveVisitBudget((int64_t)400, 50, -1); + testAssert(pass.midVisits == 50); // explicit mid honored, not anchored to 400 + testAssert(pass.flooredFromBelow2 == false); + testAssert(pass.maxVisitsCap == 400); // auto cap anchors to baseline >= mid + testAssert(pass.raisesAboveBaseline == false); // 50 < 400, 400 == 400 + + VisitBudget midRaise = resolveVisitBudget((int64_t)12, 50, -1); + testAssert(midRaise.midVisits == 50); + testAssert(midRaise.maxVisitsCap == 50); // cap auto = max(mid=50, anchor=12) = 50 + testAssert(midRaise.flooredFromBelow2 == false); + testAssert(midRaise.raisesAboveBaseline == true); // mid 50 > baseline 12 + testAssert(midRaise.effectiveBaseline == 12); + + // The MF-1 scenario: a floored mid (-search-visits 1 -> 2) AND an explicit cap far above baseline. + // flooredFromBelow2 must NOT suppress the cap-driven over-baseline signal: the CLI's loud warning + // gates on (maxVisitsCap != -1 && maxVisitsCap > effectiveBaseline), which is true here. + VisitBudget capRaiseFloored = resolveVisitBudget((int64_t)12, 1, 400); + testAssert(capRaiseFloored.midVisits == 2); + testAssert(capRaiseFloored.maxVisitsCap == 400); + testAssert(capRaiseFloored.flooredFromBelow2 == true); + testAssert(capRaiseFloored.raisesAboveBaseline == true); + testAssert(capRaiseFloored.effectiveBaseline == 12); + testAssert(capRaiseFloored.maxVisitsCap > capRaiseFloored.effectiveBaseline); // -> loud warning fires + } + + // Test 19: effectiveXHi shrinks the calibration range to 2.0 only when segment C is flat (cap==mid, + // the auto outcome) AND the range straddles x=2; otherwise it returns xHi unchanged. + { + VisitBudget flat = resolveVisitBudget((int64_t)12, -1, -1); // cap==mid==12 -> flat segment C + testAssert(effectiveXHi(flat, 0.0, 3.0) == 2.0); // auto/flat, straddles 2 -> shrink + testAssert(effectiveXHi(flat, 0.0, 1.5) == 1.5); // xHi already <= 2 -> no shrink + testAssert(effectiveXHi(flat, 2.5, 3.0) == 3.0); // xLo >= 2 (all-in-plateau) -> not shrunk here + VisitBudget raised = resolveVisitBudget((int64_t)12, -1, 400); // cap 400 != mid 12 -> non-flat + testAssert(effectiveXHi(raised, 0.0, 3.0) == 3.0); // real visit gradient -> keep full range + } + + // Test 20: resume. A calibration can be checkpointed per-round and continued across process restarts + // (the `tunehuman` command persists each round so an environment runtime cap can't lose progress). + // (a) onSampleCollected fires exactly once per NEW round. (b) Resuming with an already-converged sample + // set returns converged WITHOUT playing more games and reproduces the same fit. (c) A split run + // (chunk1, then resume) continues the round/game counts and still converges accurately. + { + auto winrateOfElo = [](double elo) { return 1.0 / (1.0 + std::pow(10.0, -elo / 400.0)); }; + auto eloFn = [](double x) { return -100.0 + 300.0 * (x - 0.5); }; // reachable; -100 ELO at x=0.5 + + // (a) one onSampleCollected call per round, each carrying that round's games. + { + std::mt19937_64 playRng(2024); + auto playAt = [&](double x) -> std::pair { + int games = 50; + std::binomial_distribution binom(games, winrateOfElo(eloFn(x))); + return std::make_pair((double)binom(playRng), games); + }; + std::vector collected; + auto onSample = [&](double x, double wins, double games) { + collected.push_back(CalibrationSample{x, wins, games}); + }; + CalibrationResult res = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 50, 6, 25.0, (uint64_t)42, 0.5, nullptr, + std::vector(), onSample); + testAssert((int)collected.size() == res.rounds); + double sumGames = 0.0; + for(const CalibrationSample& s : collected) { testAssert(s.games == 50.0); sumGames += s.games; } + testAssert((int)sumGames == res.totalGames); + } + + // (b) resuming with a converged sample set short-circuits: playAt never called, fit reproduced exactly. + { + std::mt19937_64 playRng(7); + auto playAt = [&](double x) -> std::pair { + int games = 200; + std::binomial_distribution binom(games, winrateOfElo(eloFn(x))); + return std::make_pair((double)binom(playRng), games); + }; + std::vector samples; + auto cap = [&](double x, double wins, double games) { + samples.push_back(CalibrationSample{x, wins, games}); + }; + CalibrationResult full = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 200, 30, 25.0, (uint64_t)123, 0.5, nullptr, + std::vector(), cap); + testAssert(full.converged); + testAssert((int)samples.size() == full.rounds); + + bool playCalled = false; + auto noPlay = [&](double x) -> std::pair { + (void)x; playCalled = true; return std::make_pair(0.0, 0); + }; + CalibrationResult resumed = calibrateToTarget( + noPlay, 0.0, 1.0, 0.36, 200, 30, 25.0, (uint64_t)123, 0.5, nullptr, samples, nullptr); + testAssert(!playCalled); // no new games played + testAssert(resumed.converged); + testAssert(resumed.rounds == (int)samples.size()); + testAssert(std::fabs(resumed.xStar - full.xStar) < 1e-9); // identical fit from identical samples + testAssert(resumed.totalGames == full.totalGames); + } + + // (c) split run: chunk1 (3 rounds, not yet converged) then resume continues and converges. + { + std::mt19937_64 playRng(555); + auto playAt = [&](double x) -> std::pair { + int games = 200; + std::binomial_distribution binom(games, winrateOfElo(eloFn(x))); + return std::make_pair((double)binom(playRng), games); + }; + std::vector s; + auto cap = [&](double x, double wins, double games) { s.push_back(CalibrationSample{x, wins, games}); }; + CalibrationResult chunk1 = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 200, 3, 25.0, (uint64_t)900, 0.5, nullptr, + std::vector(), cap); + testAssert(chunk1.converged == false); + testAssert((int)s.size() == 3); + + std::vector seed = s; // resume from the 3 checkpointed rounds + CalibrationResult chunk2 = calibrateToTarget( + playAt, 0.0, 1.0, 0.36, 200, 30, 25.0, (uint64_t)900, 0.5, nullptr, seed, cap); + testAssert(chunk2.rounds > 3); // continued past the checkpoint + testAssert(chunk2.rounds == (int)s.size()); // every NEW round was checkpointed too + double allGames = 0.0; for(const CalibrationSample& cs : s) allGames += cs.games; + testAssert((int)allGames == chunk2.totalGames); // total accounts for prior + new games + testAssert(chunk2.converged); + testAssert(std::fabs(eloFn(chunk2.xStar) + 100.0) < 60.0); // accurate near -100 ELO + } + } + + cout << "Done human SL tuner tests" << endl; +} diff --git a/cpp/tests/tests.h b/cpp/tests/tests.h index 30bf5d9e96..2ca73635e9 100644 --- a/cpp/tests/tests.h +++ b/cpp/tests/tests.h @@ -36,6 +36,9 @@ namespace Tests { void runSgfTests(); void runSgfFileTests(); + //testhumansltuner.cpp + void runHumanSLTunerTests(); + //testnninputs.cpp void runNNInputsV3V4Tests(); diff --git a/docs/HumanSL_Rank_Ladder.md b/docs/HumanSL_Rank_Ladder.md new file mode 100644 index 0000000000..af3f038e06 --- /dev/null +++ b/docs/HumanSL_Rank_Ladder.md @@ -0,0 +1,276 @@ +# Human-SL KGS-Rank Ladder + +A set of GTP configs that make KataGo (with the Human-SL net) play at a chosen amateur +rank, from **9d (top)** down to **20k**, where **each consecutive rank is exactly 1 KGS rank +(1 handicap stone) apart**. The ladder is anchored at 9d; every weaker rank is tuned to be +exactly 1 rank below the rank above it. + +> **Note on history:** an earlier version of this ladder targeted a fixed **−100 ELO** per rung. +> That was **arbitrary** — the Human-SL profiles *are* KGS ranks (see below), so the correct +> spacing is **1 rank = 1 stone**, which for these bots is worth ~150–200 ELO, not 100. The +> configs and method here use the rank/handicap calibration. See *Why rank-spacing, not ELO*. + +## Why rank-spacing, not ELO + +The Human-SL net is **conditioned on KGS rank**. In `cpp/neuralnet/sgfmetadata.cpp`, +`makeBasicRankProfile` sets `source = SOURCE_KGS` with the comment *"KGS rating system is pretty +reasonable, so let's use KGS as the source,"* and the rank→index map is: + +``` +9d→1 8d→2 7d→3 6d→4 5d→5 4d→6 3d→7 2d→8 1d→9 +1k→10 2k→11 … 5k→14 … 10k→19 … 20k→29 +``` + +So `preaz_9d` and `preaz_8d` are **exactly 1 KGS rank apart** by construction (`preaz_` = the +pre-AlphaZero era, game date 2016-09). The right thing to calibrate is the **1-rank (1-stone) +gap**, not an ELO number. + +**The KGS 1-rank rule (= 1 stone):** a 1-rank difference is *not* a placed stone — it is an even +game where the **stronger player (White) gets no komi compensation** (~0.5 instead of the +territory even-komi 6.5). The +weaker player keeps the first-move advantage (~7 points ≈ the value of the first handicap stone). +So two configs are exactly 1 rank apart when: + +> **weaker rank as Black** vs **stronger rank as White**, **komi 0.5** → **even game (50%)**. + +This is the calibration target for every rung below 9d. It is also **far more numerically stable** +than the old even-game ELO target: a 50% winrate is bounded and well-conditioned, whereas an +even-game ELO target can sit on a steep, unpinnable cliff (the abandoned 7d even-game attempt +burned ~640 games and never converged). + +## The 9d anchor (special case) + +9d is the **top** of the ladder and has no "rank above" it, so it is anchored differently: the +**`preaz_9d`** (pre-AZ 9d) candidate is calibrated to **even (0 ELO) parity** against the modern +**`rank_9d`** reference — a *different* Human-SL profile — both at 400 visits, in a normal even +game (komi 6.5, territory). The `rank_9d` reference is built from the repo's `gtp_human9d_search_example.cfg` +template by changing `humanSLProfile = rank_9d` (that example ships `preaz_9d`, a different +profile). Result: `gtp_human9d.cfg` = `preaz_9d @ 400v, λ0.045`. + +## File naming + +One config per rank: **`gtp_human.cfg`** — e.g. `gtp_human9d.cfg`, `gtp_human8d.cfg`, …, +`gtp_human20k.cfg`. The upstream examples `gtp_human5k_example.cfg` and +`gtp_human9d_search_example.cfg` are left untouched (the latter is used only to seed the +`rank_9d` reference for the 9d anchor). + +## Method + +Configs are produced by the **`katago tunehuman`** subcommand, which plays in-process +candidate-vs-baseline games and tunes `humanSLChosenMovePiklLambda` (the strength dial) to hit a +target winrate. + +### Ruleset — matches the deployed configs + +The tuning games are played under the **same ruleset as the deployed config** being calibrated +against: `tunehuman` reads the baseline config's `rules =` line and scores the in-process games with +it (falling back to Japanese). All `gtp_human.cfg` declare `rules = japanese`, so calibration +runs under **Japanese / territory** scoring — exactly how the bots are scored in real play, and the +ruleset the Human-SL net's KGS-rank conditioning was learned from. (Earlier results were measured +under Chinese / area scoring and are being re-measured under Japanese.) Because the even-game komi +differs by ruleset, the 9d anchor uses the territory-fair **komi 6.5** (not 7.5); the 1-rank handicap +stays **komi 0.5** under either ruleset. + +### The lever — `humanSLChosenMovePiklLambda` + +Strength is controlled almost entirely by **λ = `humanSLChosenMovePiklLambda`** at a fixed +`maxVisits`: + +- **high λ** → closer to raw human policy → **weaker** / more human; +- **low λ** → trusts KataGo's search more → **stronger**. + +`maxVisits` is a *weak* lever near the top of the ladder (the strong anchor saturates it), so all +the upper-dan rungs run at the anchor's **400 visits** and differ only in λ. (`tunehuman` also has +a 3-segment "strength dial" over `x∈[0,3]` — temperature at x<1, λ over [1,2], visits over [2,3] — +but for this ladder we pin visits and sweep λ directly via a fixed-λ grind.) + +### Handicap calibration (rungs below 9d) + +For each rung, tune λ so the **weaker rank (Black) vs the prior rung (White), komi 0.5** is **50%**: + +``` +-target-elo 0 # 0 ELO offset == 50% winrate target +-komi 0.5 # KGS 1-rank handicap: White (stronger) gets no compensation +-cand-color black # weaker candidate always plays Black (the handicap is color-bound) +``` + +`-komi` and `-cand-color` were added to `tunehuman` for this; the harness pins `komiStdev=0` and +`komiAllowIntegerProb=0` so komi is applied **exactly**. With these flags, color is **not** +alternated (the handicap asymmetry is the point), so the measured winrate is the candidate's +winrate as Black-with-the-handicap. + +The 9d **anchor** instead uses even games (`-komi 6.5`, alternating colors) targeting 50% parity +vs `rank_9d`. + +### Pinning a value (avoid small-sample noise) + +For these saturating bots the λ→winrate curve is **steep**, so small samples mislead badly: 20-game +reads can swing ±20% and even flip the apparent λ-ordering. Grind each λ to **~80–200 games** before +trusting it. Pin the crossing with a **direct fixed-λ grind** (set `maxVisits` = the rung's visits +on both dial ends and accumulate games), reading the raw winrate — this avoids a logistic fit that +is biased by the winrate ceiling. + +### Resumable checkpointing + +Each round's `(x, wins, games)` is appended to a `-resume-file` (default `.samples`) +with a config-signature header. Re-running the same command reloads the samples and continues, so a +run interrupted by the environment's process-runtime cap resumes from its last completed round. +Partial/corrupt final lines from a hard kill are skipped; a signature mismatch fails loud. The +helper scripts `tune_lambda.sh` (λ sweep) and `tune_maxvisits.sh` (fixed-λ / visit sweep) wrap this +with a per-chunk `timeout` + a winrate/CI readout, and accept `KOMI=` / `CAND_COLOR=` env vars. + +### Nets used + +- **Main net (`-model`)**: `lionffen_b24c64_3x3_v3_12300.bin.gz` (b24c64). +- **Human-SL net (`-human-model`)**: `b18c384nbt-humanv0.bin.gz`. +- **Profile**: each config sets `humanSLProfile = preaz_` (pre-AlphaZero KGS-rank profiles). + +## Reproduction + +**9d anchor → even-game parity vs the modern `rank_9d` reference:** + +```bash +# build the rank_9d (modern 9d) reference from the preaz_9d example template +sed 's/^humanSLProfile = preaz_9d/humanSLProfile = rank_9d/' \ + configs/gtp_human9d_search_example.cfg > baseline_rank9d_400.cfg + +# preaz_9d @ 400v, fixed-λ grind to 50% parity (even game) +katago tunehuman \ + -model lionffen_b24c64_3x3_v3_12300.bin.gz \ + -human-model b18c384nbt-humanv0.bin.gz \ + -baseline-config baseline_rank9d_400.cfg \ + -profile preaz_9d -target-elo 0 -elo-tol 8 \ + -search-visits 400 -max-visits-cap 400 -pikl-floor 0.045 -x-lo 2.0 -x-hi 3.0 \ + -komi 6.5 -cand-color auto \ + -games-per-round 10 -num-game-threads 10 \ + -resume-file gtp_human9d.samples -output-config gtp_human9d.cfg +``` + +**Each weaker rank → 1 KGS rank below the prior rung, via the komi-0.5 handicap.** Example, 8d +(candidate `preaz_8d` as Black vs the tuned `gtp_human9d.cfg` as White), tuning λ to 50%: + +```bash +# fixed-λ grind at the chosen λ (here 0.0865), handicap match, target 50% +katago tunehuman \ + -model lionffen_b24c64_3x3_v3_12300.bin.gz \ + -human-model b18c384nbt-humanv0.bin.gz \ + -baseline-config gtp_human9d.cfg \ + -profile preaz_8d -target-elo 0 -elo-tol 8 \ + -search-visits 400 -max-visits-cap 400 -pikl-floor 0.0865 -x-lo 2.0 -x-hi 3.0 \ + -komi 0.5 -cand-color black \ + -games-per-round 5 -num-game-threads 5 \ + -resume-file gtp_human8d.samples -output-config gtp_human8d.cfg +``` + +To find the right λ first, sweep a bracket (e.g. `pikl-floor 0.02 pikl-max 0.10`, `-x-lo 1 -x-hi 2`) +to locate the ~50% crossing, then fixed-λ-grind that value to ~80+ games. Re-run any command to +resume from its checkpoint. The next rung (7d) chains off `gtp_human8d.cfg` the same way. + +## Results + +Winrates are direct candidate-vs-baseline results with a **95%** Wilson-score CI. The 9d anchor +target is even-game 50% parity; every rung below is the komi-0.5 handicap 50% (= exactly 1 rank). + +> **Tuning (Japanese rules, automated) — COMPLETE.** All **28 rungs** below the 9d anchor +> (8d…1d, 1k…20k) are tuned and locked; with the anchor that is the full **9d→20k** ladder (29 configs). +> Rungs are tuned by a sequential root-finder (`tune_decide.py` + `ladder_step.sh`): for each rank it +> pools all (λ, winrate, games) data, weighted-isotonic-fits the 50% crossing, grinds at that λ until +> the **95% CI ⊂ [40%, 60%]**, writes `gtp_human.cfg`, builds the next baseline, and chains to the +> next rung — resuming per-round to survive the environment's process-kill cap. Every rung landed at +> **46–52%** with its 95% CI inside [40, 60]. Backend: MLX (Apple-Silicon GPU+ANE); tuned λ are +> backend-independent. (Earlier area-scoring numbers were superseded by Japanese.) + +### Findings + +- **λ progression** (strength dial vs rank): rises smoothly through the dan rungs (9d **0.045** → + 1d **0.509**), is noisy-but-roughly-flat through the mid-kyu (1k–6k ≈ **0.47–0.51**), then **climbs + steeply** through the deep kyu: 7k 0.534, 10k 0.590, 14k 0.616, 17k 0.741, 18k 0.782, 19k 0.898, + **20k 1.223**. λ is *not* globally monotone (each rung is calibrated independently to its own + baseline), but the deep-kyu trend is a clear, accelerating rise. +- **Deep-kyu rungs (7k+) are "flat-strong plateau → steep cliff."** Their winrate sits well above 50% + across a wide λ band, then drops through 50% over a narrow λ window. These rungs cost the most games + (≈ 500–1000 each) and needed the root-finder's noise handling (concentrate-near-50% vs grind-the- + crossing, drop misleading snaps, a total-games safety cap, and occasional manual concentration of the + best-sampled point). +- **20k needs near-pure-human play (λ ≈ 1.22).** The human-SL net's `preaz_19k` and `preaz_20k` + profiles are **less than 1 KGS rank apart** (rank conditioning compresses at the weakest end), so the + komi-0.5 handicap nearly outweighs the tiny profile gap; only at λ>1 (almost no search) does + preaz_20k+handicap come down to even vs the tuned 19k. It still pins cleanly at λ1.2227 = 50.0%. +- **No rung hit stop-condition #2.** Every rank reached a 95% CI ⊂ [40, 60]; none was left as a + best-effort. The reproduction below shows the per-rung command; the table lists every locked value. + +| Config | Profile | Baseline (White) | Spacing target | Measured | maxVisits | piklLambda | +|--------|---------|------------------|----------------|----------|----------:|-----------:| +| `gtp_human9d.cfg` | preaz_9d | rank_9d @ 400v λ0.08 | even-game parity (50%) | 49.0% = −7 ELO [39%, 59%], 100 g (Japanese) ✅ | 400 | **0.045** | +| `gtp_human8d.cfg` | preaz_8d | gtp_human9d.cfg | 1 KGS rank (komi-0.5 = 50%) | 47.0% [40.5%, 53.6%], 219 g ✅ | 400 | **0.0868** | +| `gtp_human7d.cfg` | preaz_7d | gtp_human8d.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.6% [40.6%, 56.7%], 144 g ✅ | 400 | **0.1267** | +| `gtp_human6d.cfg` | preaz_6d | gtp_human7d.cfg | 1 KGS rank (komi-0.5 = 50%) | 52.0% [44.1%, 59.8%], 152 g ✅ | 400 | **0.1983** | +| `gtp_human5d.cfg` | preaz_5d | gtp_human6d.cfg | 1 KGS rank (komi-0.5 = 50%) | 51.2% [43.6%, 58.8%], 164 g ✅ | 400 | **0.28064** | +| `gtp_human4d.cfg` | preaz_4d | gtp_human5d.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [43.9%, 56.1%], 256 g ✅ | 400 | **0.373** | +| `gtp_human3d.cfg` | preaz_3d | gtp_human4d.cfg | 1 KGS rank (komi-0.5 = 50%) | 51.5% [43.1%, 59.7%], 136 g ✅ | 400 | **0.45556** | +| `gtp_human2d.cfg` | preaz_2d | gtp_human3d.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [41.9%, 58.1%], 144 g ✅ | 400 | **0.51330** | +| `gtp_human1d.cfg` | preaz_1d | gtp_human2d.cfg | 1 KGS rank (komi-0.5 = 50%) | 49.1% [42.5%, 55.7%], 216 g ✅ | 400 | **0.50930** | +| `gtp_human1k.cfg` | preaz_1k | gtp_human1d.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.7% [42.5%, 58.9%], 140 g ✅ | 400 | **0.48988** | +| `gtp_human2k.cfg` | preaz_2k | gtp_human1k.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.2% [40.8%, 55.7%], 168 g ✅ | 400 | **0.46755** | +| `gtp_human3k.cfg` | preaz_3k | gtp_human2k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [41.5%, 58.5%], 128 g ✅ | 400 | **0.49173** | +| `gtp_human4k.cfg` | preaz_4k | gtp_human3k.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.1% [40.5%, 55.8%], 160 g ✅ | 400 | **0.47130** | +| `gtp_human5k.cfg` | preaz_5k | gtp_human4k.cfg | 1 KGS rank (komi-0.5 = 50%) | 51.2% [43.6%, 58.9%], 160 g ✅ | 400 | **0.50720** | +| `gtp_human6k.cfg` | preaz_6k | gtp_human5k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.8% [42.0%, 59.6%], 120 g ✅ | 400 | **0.48925** | +| `gtp_human7k.cfg` | preaz_7k | gtp_human6k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.9% [41.8%, 60.0%], 112 g ✅ | 400 | **0.53370** | +| `gtp_human8k.cfg` | preaz_8k | gtp_human7k.cfg | 1 KGS rank (komi-0.5 = 50%) | 49.1% [40.2%, 58.1%], 116 g ✅ | 400 | **0.50640** | +| `gtp_human9k.cfg` | preaz_9k | gtp_human8k.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.0% [41.3%, 54.9%], 204 g ✅ | 400 | **0.53880** | +| `gtp_human10k.cfg` | preaz_10k | gtp_human9k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [42.0%, 58.0%], 148 g ✅ | 400 | **0.59036** | +| `gtp_human11k.cfg` | preaz_11k | gtp_human10k.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.1% [40.5%, 55.8%], 160 g ✅ | 400 | **0.56458** | +| `gtp_human12k.cfg` | preaz_12k | gtp_human11k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.8% [42.2%, 59.3%], 128 g ✅ | 400 | **0.54297** | +| `gtp_human13k.cfg` | preaz_13k | gtp_human12k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.8% [42.1%, 59.4%], 124 g ✅ | 400 | **0.58977** | +| `gtp_human14k.cfg` | preaz_14k | gtp_human13k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [41.3%, 58.7%], 124 g ✅ | 400 | **0.61625** | +| `gtp_human15k.cfg` | preaz_15k | gtp_human14k.cfg | 1 KGS rank (komi-0.5 = 50%) | 49.1% [40.2%, 58.1%], 116 g ✅ | 400 | **0.61839** | +| `gtp_human16k.cfg` | preaz_16k | gtp_human15k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [42.1%, 57.9%], 152 g ✅ | 400 | **0.67050** | +| `gtp_human17k.cfg` | preaz_17k | gtp_human16k.cfg | 1 KGS rank (komi-0.5 = 50%) | 48.3% [40.9%, 55.7%], 172 g ✅ | 400 | **0.74130** | +| `gtp_human18k.cfg` | preaz_18k | gtp_human17k.cfg | 1 KGS rank (komi-0.5 = 50%) | 46.3% [40.4%, 52.2%], 268 g ✅ (steep-λ-cliff rung) | 400 | **0.78210** | +| `gtp_human19k.cfg` | preaz_19k | gtp_human18k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [41.0%, 59.0%], 116 g ✅ | 400 | **0.89820** | +| `gtp_human20k.cfg` | preaz_20k | gtp_human19k.cfg | 1 KGS rank (komi-0.5 = 50%) | 50.0% [40.6%, 59.4%], 104 g ✅ | 400 | **1.22270** | + +_(Remaining ranks 7d…20k to be appended as tuned, each 1 KGS rank below the prior via the handicap method.)_ + +### 8d rung — what was measured (2026-06) + +- **8d = 1 KGS rank below 9d at λ0.0865.** `preaz_8d@400v λ0.0865` (Black) vs `gtp_human9d.cfg` + (White, komi 0.5) = **60/120 = 50.0%, 95% CI [41.2%, 58.8%]** (⊂ [40%,60%]) → exactly 1 rank confirmed. +- **preaz_8d needs the SAME 400 visits as the 9d anchor** — at 200v it is far too weak (visits are + a weak lever near the strong anchor). The rung is reached by **raising λ** (9d's 0.045 → 0.0865, + i.e. more human / weaker move-selection). +- **1 stone ≈ 150–200 ELO here, not 100.** The KGS-correct 8d (λ0.0865) is meaningfully weaker than + the earlier −100-ELO attempt (λ0.0575, now superseded), because the komi-0.5 handicap is a large + advantage for these bots. +- **The handicap (50%-winrate) calibration is well-behaved** — bounded target, gentle slope near + 50% — so it pins cleanly, unlike the even-game ELO target which hit an unpinnable λ-cliff at 7d. + +### 9d anchor — what was measured (2026-06) + +- **piklLambda is the dominant strength lever; visits are nearly inert at high λ.** Versus the strong + `rank_9d@400v` reference, raising the candidate's visits at λ0.08 barely moved the result (it loses + ~−190 ELO at 400v and never reaches parity within [400, 1600] visits) — at high λ the extra search + is spent exploring human-policy (weaker) moves. **Lowering λ is what reaches parity.** +- **Parity sits at λ ≈ 0.045** (not 0.08): `preaz_9d@400v λ0.045` = **201/383 = 52.5% = +17 ELO, + 95% CI [−18, +52]** vs `rank_9d@400v λ0.08` — statistically at parity, stable across the whole + 94→383-game grind (the point estimate never left [−7, +23] ELO). +- **The λ→ELO response is shallow near parity (~23 ELO per 0.01 λ)** because preaz_9d's winrate + *saturates* (ceiling ~67%, not →100%) against the strong reference — `humanSLRootExploreProb + = 0.8` caps how much search can sharpen play. So a logistic auto-fit is biased high near parity; + the anchor was pinned with a direct fixed-λ grind, and λ ∈ [0.04, 0.05] all sit within ~±25 ELO of + parity (finer precision than ±0.01 λ is unnecessary, and would cost ~30 h of 400v games). + +### Cost & practical notes + +- A rung confirmed to ~±10% winrate (≈1 rank) takes ~80–200 games at 400v (~hours at half + resources). The full 20k→9d ladder is a large compute project; lower (weaker) ranks may chain off + cheaper, lower-visit baselines once away from the strong 9d/8d anchors. +- Run **one** GPU job at a time (~5 game-threads) — concurrent `katago` processes trigger + memory-pressure (jetsam) kills. Keep run artifacts in a persistent dir, not `/tmp`. +- Always report a tuned winrate/ELO **with its 95% CI and sample count** — small (5–30 game) + samples are deceptive on these steep curves. + +--- +_Generated by the `tunehuman` workflow. Configs and this doc are local artifacts (not the +upstream KataGo examples)._ diff --git a/ladder_step.sh b/ladder_step.sh new file mode 100755 index 0000000000..1324588d16 --- /dev/null +++ b/ladder_step.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# +# ladder_step.sh — one autonomous step of the Human-SL rank-ladder λ calibration. +# +# Uses tune_decide.py (the decision brain: pools all (λ,winrate,games) data for the current +# rank, isotonic-fits the 50% crossing, decides GRIND/LOCK/STOP) to drive the ladder without +# human noise-chasing. Each invocation does exactly one of: +# GRIND -> run ONE ~23-min tunehuman chunk at the recommended λ (komi-0.5 Japanese handicap) +# LOCK -> winrate CI ⊂ [40,60]: write gtp_human.cfg, build next ANE baseline, advance +# STOP -> best-effort (λ>1e8 or >500g at best λ): write config anyway, advance, flag it +# +# Invoke repeatedly (the /loop re-invokes on each chunk completion). State persists in +# ladder_state.txt + the per-λ jpn_ane_L*.samples checkpoints, so it survives the +# process-kill cap. Run THIS in the background; it foregrounds one chunk then exits. +set -u +ROOT=/Users/chinchangyang/Code/KataGo-MLX +TUNE=$HOME/.katago_tune +CONFIGS=$ROOT/cpp/configs +STATE=$TUNE/ladder_state.txt +LOCKLOG=$TUNE/ladder_locks.txt +TIMEOUT=${TIMEOUT:-1400} + +# rank chain 9d..1d then 1k..20k (functions, not assoc arrays — macOS bash 3.2 lacks `declare -A`) +stronger() { case "$1" in + 8d)echo 9d;;7d)echo 8d;;6d)echo 7d;;5d)echo 6d;;4d)echo 5d;;3d)echo 4d;;2d)echo 3d;;1d)echo 2d;; + 1k)echo 1d;; *k) echo "$(( ${1%k} - 1 ))k";; *)echo "";; esac; } +weaker() { case "$1" in + 9d)echo 8d;;8d)echo 7d;;7d)echo 6d;;6d)echo 5d;;5d)echo 4d;;4d)echo 3d;;3d)echo 2d;;2d)echo 1d;;1d)echo 1k;; + *k) n=${1%k}; if [ "$n" -ge 20 ]; then echo DONE; else echo "$((n+1))k"; fi;; *)echo "";; esac; } + +RANK=$(cat "$STATE" 2>/dev/null || echo 8d) +if [ "$RANK" = DONE ]; then echo "LADDER COMPLETE — all rungs 8d..1d, 1k..20k done."; exit 0; fi +STR=$(stronger "$RANK") +[ -z "$STR" ] && { echo "ERROR: unknown rank '$RANK'"; exit 2; } +PROFILE=preaz_$RANK +BASELINE=$TUNE/tunebase_human${STR}_ane.cfg +[ -f "$BASELINE" ] || { echo "ERROR: baseline $BASELINE missing (need tuned ${STR} first)"; exit 2; } + +DEC=$(python3 "$TUNE/tune_decide.py" "$TUNE/jpn${RANK}_ane_L*.samples") +echo "[$(date '+%H:%M:%S')] rank=$RANK baseline=$STR -> $DEC" +ACTION=$(printf '%s' "$DEC" | sed -n 's/.*ACTION=\([A-Z]*\).*/\1/p') +LAMBDA=$(printf '%s' "$DEC" | sed -n 's/.*LAMBDA=\([0-9.]*\).*/\1/p') + +case "$ACTION" in + GRIND) + if [ -z "$LAMBDA" ] || [ "$LAMBDA" = NA ]; then + # fresh rank, no data: seed λ by EXTRAPOLATING the last λ-step (the steps grow for weaker + # ranks). seed = stronger λ + (stronger λ - grandparent λ)*1.15; fall back to +0.04. + SLAM=$(grep -E '^humanSLChosenMovePiklLambda' "$CONFIGS/gtp_human${STR}.cfg" | awk '{print $3}') + GP=$(stronger "$STR") + GLAM=$(grep -E '^humanSLChosenMovePiklLambda' "$CONFIGS/gtp_human${GP}.cfg" 2>/dev/null | awk '{print $3}') + LAMBDA=$(python3 -c "s=float('${SLAM:-0.05}'); g='${GLAM}'; seed=(s+float(g))/2 if g else s+0.04; print(f'{max(0.001, seed):.5f}')") + echo " seeding fresh rank $RANK at λ=$LAMBDA (extrapolated from ${STR}=${SLAM}, ${GP}=${GLAM:-NA})" + fi + LAMTAG=$(printf '%s' "$LAMBDA" | sed 's/^0\.//; s/0*$//') # 0.08680 -> 0868, 0.08677 -> 08677 + for p in $(ps aux | grep "[k]atago tunehuman" | awk '{print $2}'); do kill -9 "$p" 2>/dev/null; done + sleep 1 + BASELINE_CFG=$BASELINE CAND_PROFILE=$PROFILE PIKL=$LAMBDA V_LO=400 V_HI=400 \ + KOMI=0.5 CAND_COLOR=black TARGET_ELO=0 ELO_TOL=8 \ + GAMES_PER_ROUND=4 GAME_THREADS=4 TIMEOUT=$TIMEOUT \ + TAG=jpn${RANK}_ane_L${LAMTAG} \ + RESUME=$TUNE/jpn${RANK}_ane_L${LAMTAG}.samples \ + LOG=$TUNE/jpn${RANK}_ane_L${LAMTAG}.log \ + OUT=$TUNE/jpn${RANK}_ane_L${LAMTAG}.out.cfg \ + "$ROOT/tune_maxvisits.sh" + ;; + LOCK|STOP) + WR=$(printf '%s' "$DEC" | sed -n 's/.*WR=\([0-9.]*\).*/\1/p') + CI=$(printf '%s' "$DEC" | sed -n 's/.*CI=\([0-9.,]*\).*/\1/p') + N=$(printf '%s' "$DEC" | sed -n 's/.*N=\([0-9]*\).*/\1/p') + DST=$CONFIGS/gtp_human${RANK}.cfg + SRC=$DST; [ -f "$DST" ] || SRC=$CONFIGS/gtp_human${STR}.cfg # update-in-place if it exists + CALC="# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_${RANK} (Black) vs gtp_human${STR}.cfg (White) = ${WR}% [${CI}] over ${N} games. λ=${LAMBDA}. ${ACTION}." + sed -e "s/^humanSLProfile *=.*/humanSLProfile = ${PROFILE}/" \ + -e "s/^humanSLChosenMovePiklLambda *=.*/humanSLChosenMovePiklLambda = ${LAMBDA}/" \ + "$SRC" > "$DST.tmp" + { echo "$CALC"; cat "$DST.tmp"; } > "$DST"; rm -f "$DST.tmp" + echo "WROTE $DST λ=${LAMBDA} ${ACTION} ${WR}% [${CI}] ${N}g" + echo "$(date '+%F %T') ${RANK} λ=${LAMBDA} ${WR}% CI[${CI}] ${N}g ${ACTION}" >> "$LOCKLOG" + # build the ANE tuning baseline for the next (weaker) rung + NB=$TUNE/tunebase_human${RANK}_ane.cfg + sed -e 's/^nnCacheSizePowerOfTwo *=.*/nnCacheSizePowerOfTwo = 18/' \ + -e 's/^nnMutexPoolSizePowerOfTwo *=.*/nnMutexPoolSizePowerOfTwo = 12/' "$DST" > "$NB" + { echo ""; echo "# ANE-mux tuning baseline (GPU thread0 + ANE thread1); cache lowered — no play effect."; + echo "numNNServerThreadsPerModel = 2"; echo "deviceToUseThread0 = 0"; echo "deviceToUseThread1 = 100"; } >> "$NB" + NEXT=$(weaker "$RANK") + echo "$NEXT" > "$STATE" + echo "ADVANCED $RANK -> $NEXT (next baseline: $NB)" + ;; + *) echo "ERROR: could not parse action from: $DEC"; exit 3 ;; +esac diff --git a/tune_decide.py b/tune_decide.py new file mode 100644 index 0000000000..0c8134d50e --- /dev/null +++ b/tune_decide.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +tune_decide.py — decision brain for the Human-SL rank-ladder λ calibration. + +Pools ALL (λ, wins, games) data for one rank (across every λ ever tried), enforces a +monotonic winrate-vs-λ curve (weighted isotonic / PAVA — robust to noise & non-monotone +fluctuations), estimates the 50%-winrate crossing λ*, and decides the next action: + + ACTION=LOCK LAMBDA=<λ> WR=<%> CI= N= # some λ's 95% CI ⊂ [40,60] + ACTION=GRIND LAMBDA=<λ> # accumulate games at λ* (grid 1e-4) + ACTION=STOP LAMBDA=<λ> WR=<%> CI= N= # >MAXGAMES, no lock: best-effort + +Winrate DECREASES with λ (higher λ = more human = weaker candidate). Candidate is the +weaker rank as Black with the komi-0.5 handicap; target = 50%. + +Usage: tune_decide.py e.g. tune_decide.py '~/.katago_tune/jpn8d_ane_L*.samples' +""" +import sys, glob, os, re, math + +CI_LO, CI_HI = 0.40, 0.60 # target band for the 95% CI +GRID = 1e-4 # λ rounding grid (so games pool at grid points) +MAXGAMES = 500 # per-rank budget before stop-condition #2 +Z = 1.96 + +def wilson(w, n): + if n == 0: return (0.0, 1.0) + p = w / n + den = 1 + Z*Z/n + cen = (p + Z*Z/(2*n)) / den + mar = (Z/den) * math.sqrt(p*(1-p)/n + Z*Z/(4*n*n)) + return (cen - mar, cen + mar) + +def parse(path): + """Return (lambda, wins, games) for one fixed-λ samples file.""" + lam = None; w = 0; g = 0 + with open(path) as f: + for i, line in enumerate(f): + if i == 0: + m = re.search(r'piklFloor=([0-9.]+)', line) + if m: lam = float(m.group(1)) + continue + if line.startswith('#'): continue + parts = line.split() + if len(parts) >= 3: + try: + w += float(parts[1]); g += int(parts[2]) + except ValueError: + pass + return (lam, int(w), g) + +def pava_decreasing(pts): + """Weighted pool-adjacent-violators for a NON-INCREASING fit. + pts: list of (lambda, wins, games) sorted by lambda ascending. + Returns list of blocks: (lambda_min, lambda_max, pooled_winrate, games).""" + blocks = [[lam, lam, w, g] for (lam, w, g) in pts] # [lmin,lmax,wins,games] + i = 0 + while i < len(blocks) - 1: + r_i = blocks[i][2] / blocks[i][3] + r_j = blocks[i+1][2] / blocks[i+1][3] + if r_i < r_j: # violation of non-increasing: pool + blocks[i][1] = blocks[i+1][1] + blocks[i][2] += blocks[i+1][2] + blocks[i][3] += blocks[i+1][3] + del blocks[i+1] + if i > 0: i -= 1 + else: + i += 1 + return [(b[0], b[1], b[2]/b[3], b[3]) for b in blocks] + +def main(): + pattern = os.path.expanduser(sys.argv[1]) + files = sorted(glob.glob(pattern)) + # aggregate by λ (multiple files at same λ pool together) + agg = {} + for f in files: + lam, w, g = parse(f) + if lam is None or g == 0: continue + a = agg.setdefault(round(lam, 6), [0, 0]) + a[0] += w; a[1] += g + pts = sorted((lam, w, g) for lam, (w, g) in agg.items()) + total = sum(g for _, _, g in pts) + if not pts: + print("ACTION=GRIND LAMBDA=NA NOTE=no-data"); return + + # ---- LOCK check: any λ whose 95% CI ⊂ [40,60]? pick the best-centered ---- + lockable = [] + for lam, w, g in pts: + lo, hi = wilson(w, g) + if lo >= CI_LO and hi <= CI_HI: + lockable.append((abs(w/g - 0.5), lam, w/g, lo, hi, g)) + if lockable: + lockable.sort() + _, lam, p, lo, hi, g = lockable[0] + print(f"ACTION=LOCK LAMBDA={lam:.5f} WR={100*p:.1f} CI={100*lo:.1f},{100*hi:.1f} N={g}") + return + + # ---- decide next λ ---- + fit = pava_decreasing(pts) + lams = [lam for lam, _, _ in pts] + lo_lam, hi_lam = min(lams), max(lams) + bracketed = fit[0][2] >= 0.5 >= fit[-1][2] + best = min(pts, key=lambda t: abs(t[1]/t[2]-0.5)) # λ whose winrate is closest to 50% + blo, bhi = wilson(best[1], best[2]) + bp = best[1]/best[2] + + # stop-condition #2: best λ ground past budget, OR total games across all λ exceeds a hard cap + # (a genuinely noisy/cliff rung that spreads games without any single λ pinning), OR λ blew up. + if best[2] > MAXGAMES or total > 1300 or best[0] > 1e8: + print(f"ACTION=STOP LAMBDA={best[0]:.5f} WR={100*bp:.1f} " + f"CI={100*blo:.1f},{100*bhi:.1f} N={best[2]} NOTE=best-{best[2]}g-total{total}g-cannot-pin") + return + + # BRACKETED (points on both sides of 50%): grind the INTERPOLATED crossing — the most central + # λ — rather than concentrating a slightly-off best point (which would need far more games to + # lock). The isotonic fit tames noise; the crossing estimate refines each chunk. + if bracketed: + # Concentrate a WELL-SAMPLED λ ONLY when it's genuinely near 50% (±3%, >=50 games) -> it locks + # fast on its own. Do NOT concentrate an EDGE point (e.g. 54% or 46%) even under heavy sampling: + # that just spins without ever tightening into [40,60]. With no near-50% point, fall through and + # grind the interpolated crossing (which targets 50% and, via the snap below, still concentrates). + central = sorted((abs(w/g-0.5), l, g) for (l, w, g) in pts if g >= 50 and abs(w/g-0.5) <= 0.03) + if central: + print(f"ACTION=GRIND LAMBDA={round(central[0][1],5):.5f} NOTE=concentrate-central-{central[0][2]}g") + return + lam_star = None + for k in range(len(fit)-1): + a_lam, b_lam = fit[k][1], fit[k+1][0] + a_wr, b_wr = fit[k][2], fit[k+1][2] + if a_wr >= 0.5 >= b_wr and a_wr != b_wr: + frac = (a_wr - 0.5) / (a_wr - b_wr) + lam_star = a_lam + frac * (b_lam - a_lam); break + if lam_star is None: + lam_star = 0.5*(lo_lam+hi_lam) + # Grind the interpolated crossing directly (targets 50%). Consolidation near 50% is handled by + # the central-concentrate check above (±3%); snapping the crossing onto a spatially-near but + # slightly-off λ (e.g. 54%) only stalls — it grinds the wrong side and never tightens to lock. + lam_grid = round(round(lam_star/GRID)*GRID, 5) + print(f"ACTION=GRIND LAMBDA={lam_grid:.5f} NOTE=bracketed-cross~{lam_star:.5f}-total{total}g") + return + + # NOT bracketed: concentrate the best λ ONLY if it's VERY central (CI includes 50% AND within + # ±3%) — a point 3-5% off is an edge point that locks slowly; better to EXPAND and find the real + # 50% crossing (where the lock comes fast) than to grind ~340 games at a persistently-55% λ. + if blo <= 0.5 <= bhi and abs(bp - 0.5) <= 0.03: + print(f"ACTION=GRIND LAMBDA={best[0]:.5f} NOTE=concentrate-{best[2]}g-CI[{100*blo:.0f},{100*bhi:.0f}]") + return + # ...otherwise EXPAND from the extreme λ toward 50% to find a bracket. + if bp < 0.5: # all too weak -> stronger; expand below strongest λ + ext = pts[0]; ewr = ext[1]/ext[2] + step = min(0.05, max(0.01, abs(ewr-0.5)*0.30 + 0.006)) + nxt = round(max(GRID, ext[0] - step), 5) + print(f"ACTION=GRIND LAMBDA={nxt:.5f} NOTE=all-too-weak({100*bp:.0f}%)-expand-stronger") + else: # all too strong -> weaker; expand above weakest λ + ext = pts[-1]; ewr = ext[1]/ext[2] + step = min(0.05, max(0.01, abs(ewr-0.5)*0.30 + 0.006)) + nxt = round(ext[0] + step, 5) + print(f"ACTION=GRIND LAMBDA={nxt:.5f} NOTE=all-too-strong({100*bp:.0f}%)-expand-weaker") + +if __name__ == "__main__": + main() diff --git a/tune_lambda.sh b/tune_lambda.sh new file mode 100755 index 0000000000..1d9da030dc --- /dev/null +++ b/tune_lambda.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# +# tune_lambda.sh — find the piklLambda that makes a Human-SL candidate reach a target ELO vs a fixed +# baseline, AT A FIXED maxVisits, using `katago tunehuman`'s strength-dial segment B (x in [1,2]). +# +# Segment B holds maxVisits = -search-visits and log-interpolates piklLambda from -pikl-max (at x=1, +# most human / weakest) down to -pikl-floor (at x=2, strongest). The calibration searches x to hit the +# target winrate, so the converged x* -> the parity-piklLambda at that visit count. +# +# Use: sweep this over several CAND_VISITS to chart "parity-lambda vs visits" and see whether more +# visits lets the candidate stay at parity with a HIGHER (more human-like) lambda. +# +# Default job: parity-lambda of preaz_9d @ 200v vs rank_9d @ 50v, lambda search range [0.02, 0.08]. +# +set -u + +ROOT=/Users/chinchangyang/Code/KataGo-MLX/cpp +KATAGO=${KATAGO:-$ROOT/build_mlx/katago} +MODEL=${MODEL:-$ROOT/models/lionffen_b24c64_3x3_v3_12300.bin.gz} +HUMAN=${HUMAN:-$ROOT/models/b18c384nbt-humanv0.bin.gz} +EXAMPLE=${EXAMPLE:-$ROOT/configs/gtp_human9d_search_example.cfg} + +BASE_PROFILE=${BASE_PROFILE:-rank_9d} # baseline (reference) profile +BASE_VISITS=${BASE_VISITS:-50} # baseline maxVisits (rank_9d@50v ~= rank_9d@400v by visit-inertness) +CAND_PROFILE=${CAND_PROFILE:-preaz_9d} # candidate profile +CAND_VISITS=${CAND_VISITS:-200} # FIXED candidate maxVisits (segment-B searchVisits) +TARGET_ELO=${TARGET_ELO:-0} +ELO_TOL=${ELO_TOL:-25} +PIKL_FLOOR=${PIKL_FLOOR:-0.02} # strongest lambda end (x=2) +PIKL_MAX=${PIKL_MAX:-0.08} # most-human lambda end (x=1) +GAMES_PER_ROUND=${GAMES_PER_ROUND:-12} +GAME_THREADS=${GAME_THREADS:-8} +TIMEOUT=${TIMEOUT:-1300} +KOMI=${KOMI:-7.5} # 0.5 for a KGS 1-rank handicap match +CAND_COLOR=${CAND_COLOR:-auto} # auto|black|white; black+KOMI=0.5 => weaker candidate gets the 1-rank handicap + +TAG=${TAG:-lam_${CAND_PROFILE}_${CAND_VISITS}v_vs_${BASE_PROFILE}${BASE_VISITS}v} +BASELINE_CFG=${BASELINE_CFG:-$HOME/.katago_tune/base_${BASE_PROFILE}_${BASE_VISITS}v.cfg} +RESUME=${RESUME:-$HOME/.katago_tune/${TAG}.samples} +OUT=${OUT:-$HOME/.katago_tune/${TAG}.cfg} +LOG=${LOG:-$HOME/.katago_tune/${TAG}.log} + +mkdir -p "$HOME/.katago_tune" +if [ ! -f "$BASELINE_CFG" ]; then + sed -e "s/^humanSLProfile *= *preaz_9d.*/humanSLProfile = ${BASE_PROFILE}/" \ + -e "s/^maxVisits *= *[0-9]*.*/maxVisits = ${BASE_VISITS}/" \ + "$EXAMPLE" > "$BASELINE_CFG" || { echo "ERROR: could not build baseline"; exit 2; } + echo "Built baseline $BASELINE_CFG ($BASE_PROFILE @ ${BASE_VISITS}v)" +fi + +echo "=== chunk: parity-lambda of $CAND_PROFILE @ ${CAND_VISITS}v -> ${TARGET_ELO} ELO vs ${BASE_PROFILE}@${BASE_VISITS}v (lambda in [$PIKL_FLOOR,$PIKL_MAX], tol +/-${ELO_TOL}) ===" +timeout "$TIMEOUT" "$KATAGO" tunehuman \ + -model "$MODEL" -human-model "$HUMAN" \ + -baseline-config "$BASELINE_CFG" \ + -profile "$CAND_PROFILE" -target-elo "$TARGET_ELO" -elo-tol "$ELO_TOL" \ + -search-visits "$CAND_VISITS" -max-visits-cap "$CAND_VISITS" \ + -pikl-floor "$PIKL_FLOOR" -pikl-max "$PIKL_MAX" \ + -komi "$KOMI" -cand-color "$CAND_COLOR" \ + -x-lo 1.0 -x-hi 2.0 \ + -games-per-round "$GAMES_PER_ROUND" -max-rounds 400 \ + -num-game-threads "$GAME_THREADS" \ + -resume-file "$RESUME" -output-config "$OUT" \ + -seed "lam-${TAG}" >> "$LOG" 2>&1 + +GAMES=$(grep -vcE '^#' "$RESUME" 2>/dev/null || echo 0) +AGG=$(awk 'NR>1{w+=$2;g+=$3} END{if(g>0){wr=w/g; e=400*log(wr/(1-wr))/log(10); + s=sqrt(wr*(1-wr)/g)*400/log(10)/(wr*(1-wr)); + printf "aggregate %d games: %+.0f ELO +/-%.0f",g,e,s}}' "$RESUME" 2>/dev/null) +LAST=$(grep -E "Round [0-9]+:" "$LOG" 2>/dev/null | tail -1) +echo "rounds=$GAMES $AGG" +echo "$LAST" + +if grep -q "converged=yes" "$LOG" 2>/dev/null && [ -f "$OUT" ]; then + echo "CONVERGED -> $OUT" + grep -E '^# (achieved|dial)' "$OUT" + exit 0 +else + echo "NOT-CONVERGED — re-run to resume (checkpoint: $RESUME)" + exit 1 +fi diff --git a/tune_maxvisits.sh b/tune_maxvisits.sh new file mode 100755 index 0000000000..a27e19095c --- /dev/null +++ b/tune_maxvisits.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# +# tune_maxvisits.sh — tune a human-SL candidate's maxVisits to a target ELO vs a fixed baseline, +# within a 1-sigma CI (default ±25 ELO), using `katago tunehuman`. +# +# Default job: tune preaz_9d's maxVisits to reach 0 ELO against rank_9d @ 8 visits. +# +# WHY a script: this environment kills any long process after ~30-45 min, and binomial game noise +# means ±25 ELO needs ~150-200 games near the crossing (several minutes/chunk). So the calibration +# CHECKPOINTS every round to a samples file and RESUMES: just run this script repeatedly until it +# prints "CONVERGED". Each invocation is time-bounded (TIMEOUT, below the kill cap) so it exits +# cleanly, checkpoints, and reports status; the next run picks up where it left off. +# +# Mechanism: the candidate maxVisits is swept via the strength dial's segment C (x in [2,3]), which +# ramps maxVisits from -search-visits (low) to -max-visits-cap (high) at a FIXED piklLambda +# (=-pikl-floor). All other params (profile, humanSLRootExploreProb, temperature, ...) come from the +# baseline config, so the candidate == "preaz_9d at the probe settings" with only maxVisits varying. +# +set -u + +# ---- knobs (override via env) ------------------------------------------------------------------- +ROOT=/Users/chinchangyang/Code/KataGo-MLX/cpp +KATAGO=${KATAGO:-$ROOT/build_mlx/katago} +MODEL=${MODEL:-$ROOT/models/lionffen_b24c64_3x3_v3_12300.bin.gz} +HUMAN=${HUMAN:-$ROOT/models/b18c384nbt-humanv0.bin.gz} +EXAMPLE=${EXAMPLE:-$ROOT/configs/gtp_human9d_search_example.cfg} # template (ships preaz_9d) + +BASE_PROFILE=${BASE_PROFILE:-rank_9d} # baseline (reference) human-SL profile +BASE_VISITS=${BASE_VISITS:-8} # baseline maxVisits (rank_9d @ 8v) +CAND_PROFILE=${CAND_PROFILE:-preaz_9d} # candidate profile to tune +TARGET_ELO=${TARGET_ELO:-0} # candidate - baseline ELO target +ELO_TOL=${ELO_TOL:-25} # stop when 1-sigma CI half-width <= this +PIKL=${PIKL:-0.08} # FIXED candidate piklLambda (segment C floor) +V_LO=${V_LO:-8} # low end of candidate maxVisits sweep +V_HI=${V_HI:-64} # high end of candidate maxVisits sweep +GAMES_PER_ROUND=${GAMES_PER_ROUND:-12} +GAME_THREADS=${GAME_THREADS:-10} +TIMEOUT=${TIMEOUT:-1400} # seconds per invocation (< the ~30-min process cap) +KOMI=${KOMI:-7.5} +CAND_COLOR=${CAND_COLOR:-auto} + +TAG=${TAG:-${CAND_PROFILE}_vs_${BASE_PROFILE}${BASE_VISITS}v} +BASELINE_CFG=${BASELINE_CFG:-/tmp/baseline_${BASE_PROFILE}_${BASE_VISITS}v.cfg} +RESUME=${RESUME:-/tmp/tune_${TAG}.samples} +OUT=${OUT:-/tmp/${CAND_PROFILE}_tuned_${TAG}.cfg} +LOG=${LOG:-/tmp/tune_${TAG}.log} + +# ---- build the baseline config once (from the repo example) ------------------------------------- +if [ ! -f "$BASELINE_CFG" ]; then + sed -e "s/^humanSLProfile *= *preaz_9d.*/humanSLProfile = ${BASE_PROFILE}/" \ + -e "s/^maxVisits *= *[0-9]*.*/maxVisits = ${BASE_VISITS}/" \ + "$EXAMPLE" > "$BASELINE_CFG" || { echo "ERROR: could not build baseline"; exit 2; } + echo "Built baseline $BASELINE_CFG ($BASE_PROFILE @ ${BASE_VISITS}v)" +fi + +# ---- one resumable, time-bounded calibration chunk ---------------------------------------------- +echo "=== chunk: tuning $CAND_PROFILE maxVisits in [$V_LO,$V_HI] -> ${TARGET_ELO} ELO vs ${BASE_PROFILE}@${BASE_VISITS}v (tol ±${ELO_TOL}) ===" +timeout "$TIMEOUT" "$KATAGO" tunehuman \ + -model "$MODEL" -human-model "$HUMAN" \ + -baseline-config "$BASELINE_CFG" \ + -profile "$CAND_PROFILE" -target-elo "$TARGET_ELO" -elo-tol "$ELO_TOL" \ + -search-visits "$V_LO" -max-visits-cap "$V_HI" -pikl-floor "$PIKL" \ + -komi "$KOMI" -cand-color "$CAND_COLOR" \ + -x-lo 2.0 -x-hi 3.0 \ + -games-per-round "$GAMES_PER_ROUND" -max-rounds 400 \ + -num-game-threads "$GAME_THREADS" \ + -resume-file "$RESUME" -output-config "$OUT" \ + -seed "tune-${TAG}" >> "$LOG" 2>&1 + +# ---- report status ------------------------------------------------------------------------------ +GAMES=$(grep -vcE '^#' "$RESUME" 2>/dev/null || echo 0) # rounds checkpointed +LAST=$(grep -E "Round [0-9]+:" "$LOG" 2>/dev/null | tail -1) +# direct aggregate winrate -> ELO (cross-check of the logistic fit) +AGG=$(awk 'NR>1{w+=$2;g+=$3} END{if(g>0){wr=w/g; e=400*log(wr/(1-wr))/log(10); + s=sqrt(wr*(1-wr)/g)*400/log(10)/(wr*(1-wr)); + printf "aggregate %d games: %+.0f ELO ±%.0f",g,e,s}}' "$RESUME" 2>/dev/null) +echo "rounds=$GAMES $AGG" +echo "$LAST" + +if grep -q "converged=yes" "$LOG" 2>/dev/null && [ -f "$OUT" ]; then + echo "CONVERGED -> $OUT" + grep -E '^# (achieved|dial)' "$OUT" + exit 0 +else + echo "NOT-CONVERGED — re-run this script to resume (checkpoint: $RESUME)" + exit 1 +fi