Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ add_executable(katago
program/playutils.cpp
program/playsettings.cpp
program/play.cpp
program/humansltuner.cpp
program/selfplaymanager.cpp
${GIT_HEADER_FILE_ALWAYS_UPDATED}
tests/testboardarea.cpp
Expand All @@ -333,6 +334,7 @@ add_executable(katago
tests/testrules.cpp
tests/testscore.cpp
tests/testsgf.cpp
tests/testhumansltuner.cpp
tests/testsymmetries.cpp
tests/testnninputs.cpp
tests/testownership.cpp
Expand Down Expand Up @@ -365,6 +367,7 @@ add_executable(katago
command/selfplay.cpp
command/startposes.cpp
command/tune.cpp
command/tunehuman.cpp
command/writetrainingdata.cpp
main.cpp
)
Expand Down
2 changes: 2 additions & 0 deletions cpp/command/runtests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ int MainCmds::runtests(const vector<string>& args) {
Tests::runSymmetryDifferenceTests();
Tests::runBoardReplayTest();

Tests::runHumanSLTunerTests();

ScoreValue::freeTables();

Tests::runInlineConfigTests();
Expand Down
575 changes: 575 additions & 0 deletions cpp/command/tunehuman.cpp

Large diffs are not rendered by default.

74 changes: 74 additions & 0 deletions cpp/configs/gtp_human10k.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_10k (Black) vs gtp_human9k.cfg (White) = 50.0% [42.0,58.0] over 148 games. λ=0.59036. LOCK.
# gtp_human10k.cfg — 10k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md.
# Tuned so preaz_10k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human9k.cfg (White)
# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits.
#
# Run: ./katago gtp -config gtp_human10k.cfg -model <katago_model>.bin.gz -human-model b18c384nbt-humanv0.bin.gz
logDir = gtp_logs
logAllGTPCommunication = true
logSearchInfo = true
logSearchInfoForChosenMove = false
logToStderr = false

rules = japanese

allowResignation = true
resignThreshold = -0.98
resignConsecTurns = 10
resignMinScoreDifference = 20
resignMinMovesPerBoardArea = 0.40

# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits.
# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits.
maxVisits = 400

numSearchThreads = 8
lagBuffer = 1.0

delayMoveScale = 2.0
delayMoveMax = 10.0

# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below).
humanSLProfile = preaz_10k
humanSLChosenMoveProp = 1.0
humanSLChosenMoveIgnorePass = true

# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger.
# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file.
# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger.
humanSLChosenMovePiklLambda = 0.59036

# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda
humanSLRootExploreProbWeightless = 0.8
humanSLRootExploreProbWeightful = 0.0
humanSLPlaExploreProbWeightless = 0.0
humanSLPlaExploreProbWeightful = 0.0
humanSLOppExploreProbWeightless = 0.0
humanSLOppExploreProbWeightful = 0.0

humanSLCpuctExploration = 0.50
humanSLCpuctPermanent = 2.0

chosenMoveTemperatureEarly = 0.70
chosenMoveTemperature = 0.25
chosenMoveTemperatureHalflife = 30
chosenMoveTemperatureOnlyBelowProb = 1.0
chosenMoveSubtract = 0
chosenMovePrune = 0

nnCacheSizePowerOfTwo = 20
nnMutexPoolSizePowerOfTwo = 14

ignorePreRootHistory = false
analysisIgnorePreRootHistory = false

rootNumSymmetriesToSample = 2
useLcbForSelection = false

winLossUtilityFactor = 1.0
staticScoreUtilityFactor = 0.5
dynamicScoreUtilityFactor = 0.5

useUncertainty = false
subtreeValueBiasFactor = 0.0
useNoisePruning = false
74 changes: 74 additions & 0 deletions cpp/configs/gtp_human11k.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_11k (Black) vs gtp_human10k.cfg (White) = 48.1% [40.5,55.8] over 160 games. λ=0.56458. LOCK.
# gtp_human11k.cfg — 11k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md.
# Tuned so preaz_11k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human10k.cfg (White)
# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits.
#
# Run: ./katago gtp -config gtp_human11k.cfg -model <katago_model>.bin.gz -human-model b18c384nbt-humanv0.bin.gz
logDir = gtp_logs
logAllGTPCommunication = true
logSearchInfo = true
logSearchInfoForChosenMove = false
logToStderr = false

rules = japanese

allowResignation = true
resignThreshold = -0.98
resignConsecTurns = 10
resignMinScoreDifference = 20
resignMinMovesPerBoardArea = 0.40

# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits.
# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits.
maxVisits = 400

numSearchThreads = 8
lagBuffer = 1.0

delayMoveScale = 2.0
delayMoveMax = 10.0

# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below).
humanSLProfile = preaz_11k
humanSLChosenMoveProp = 1.0
humanSLChosenMoveIgnorePass = true

# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger.
# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file.
# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger.
humanSLChosenMovePiklLambda = 0.56458

# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda
humanSLRootExploreProbWeightless = 0.8
humanSLRootExploreProbWeightful = 0.0
humanSLPlaExploreProbWeightless = 0.0
humanSLPlaExploreProbWeightful = 0.0
humanSLOppExploreProbWeightless = 0.0
humanSLOppExploreProbWeightful = 0.0

humanSLCpuctExploration = 0.50
humanSLCpuctPermanent = 2.0

chosenMoveTemperatureEarly = 0.70
chosenMoveTemperature = 0.25
chosenMoveTemperatureHalflife = 30
chosenMoveTemperatureOnlyBelowProb = 1.0
chosenMoveSubtract = 0
chosenMovePrune = 0

nnCacheSizePowerOfTwo = 20
nnMutexPoolSizePowerOfTwo = 14

ignorePreRootHistory = false
analysisIgnorePreRootHistory = false

rootNumSymmetriesToSample = 2
useLcbForSelection = false

winLossUtilityFactor = 1.0
staticScoreUtilityFactor = 0.5
dynamicScoreUtilityFactor = 0.5

useUncertainty = false
subtreeValueBiasFactor = 0.0
useNoisePruning = false
74 changes: 74 additions & 0 deletions cpp/configs/gtp_human12k.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_12k (Black) vs gtp_human11k.cfg (White) = 50.8% [42.2,59.3] over 128 games. λ=0.54297. LOCK.
# gtp_human12k.cfg — 12k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md.
# Tuned so preaz_12k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human11k.cfg (White)
# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits.
#
# Run: ./katago gtp -config gtp_human12k.cfg -model <katago_model>.bin.gz -human-model b18c384nbt-humanv0.bin.gz
logDir = gtp_logs
logAllGTPCommunication = true
logSearchInfo = true
logSearchInfoForChosenMove = false
logToStderr = false

rules = japanese

allowResignation = true
resignThreshold = -0.98
resignConsecTurns = 10
resignMinScoreDifference = 20
resignMinMovesPerBoardArea = 0.40

# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits.
# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits.
maxVisits = 400

numSearchThreads = 8
lagBuffer = 1.0

delayMoveScale = 2.0
delayMoveMax = 10.0

# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below).
humanSLProfile = preaz_12k
humanSLChosenMoveProp = 1.0
humanSLChosenMoveIgnorePass = true

# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger.
# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file.
# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger.
humanSLChosenMovePiklLambda = 0.54297

# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda
humanSLRootExploreProbWeightless = 0.8
humanSLRootExploreProbWeightful = 0.0
humanSLPlaExploreProbWeightless = 0.0
humanSLPlaExploreProbWeightful = 0.0
humanSLOppExploreProbWeightless = 0.0
humanSLOppExploreProbWeightful = 0.0

humanSLCpuctExploration = 0.50
humanSLCpuctPermanent = 2.0

chosenMoveTemperatureEarly = 0.70
chosenMoveTemperature = 0.25
chosenMoveTemperatureHalflife = 30
chosenMoveTemperatureOnlyBelowProb = 1.0
chosenMoveSubtract = 0
chosenMovePrune = 0

nnCacheSizePowerOfTwo = 20
nnMutexPoolSizePowerOfTwo = 14

ignorePreRootHistory = false
analysisIgnorePreRootHistory = false

rootNumSymmetriesToSample = 2
useLcbForSelection = false

winLossUtilityFactor = 1.0
staticScoreUtilityFactor = 0.5
dynamicScoreUtilityFactor = 0.5

useUncertainty = false
subtreeValueBiasFactor = 0.0
useNoisePruning = false
74 changes: 74 additions & 0 deletions cpp/configs/gtp_human13k.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# CALIBRATED (Japanese, komi-0.5 handicap, ANE): preaz_13k (Black) vs gtp_human12k.cfg (White) = 50.8% [42.1,59.4] over 124 games. λ=0.58977. LOCK.
# gtp_human13k.cfg — 13k rung of the Human-SL KGS-rank ladder. See docs/HumanSL_Rank_Ladder.md.
# Tuned so preaz_13k (Black, komi 0.5) is an even game (50%) vs the prior rung gtp_human12k.cfg (White)
# = exactly 1 KGS rank (1 stone) weaker. Strength dial = humanSLChosenMovePiklLambda (below); 400 visits.
#
# Run: ./katago gtp -config gtp_human13k.cfg -model <katago_model>.bin.gz -human-model b18c384nbt-humanv0.bin.gz
logDir = gtp_logs
logAllGTPCommunication = true
logSearchInfo = true
logSearchInfoForChosenMove = false
logToStderr = false

rules = japanese

allowResignation = true
resignThreshold = -0.98
resignConsecTurns = 10
resignMinScoreDifference = 20
resignMinMovesPerBoardArea = 0.40

# This ladder runs at a fixed 400 visits; strength is set by humanSLChosenMovePiklLambda (below), not visits.
# To adjust strength, change humanSLChosenMovePiklLambda (below), not maxVisits.
maxVisits = 400

numSearchThreads = 8
lagBuffer = 1.0

delayMoveScale = 2.0
delayMoveMax = 10.0

# Imitate a human amateur at this rung KGS rank (Human-SL profile set in humanSLProfile below).
humanSLProfile = preaz_13k
humanSLChosenMoveProp = 1.0
humanSLChosenMoveIgnorePass = true

# Strength dial: higher humanSLChosenMovePiklLambda is more human and weaker; lower trusts search and is stronger.
# The calibrated value for this rung is set below; see the CALIBRATED line at the top of this file.
# Higher -> more human / weaker; lower -> trusts KataGo search more / stronger.
humanSLChosenMovePiklLambda = 0.58977

# Spend 80% of visits to explore humanSL moves so they get evaluations for humanSLChosenMovePiklLambda
humanSLRootExploreProbWeightless = 0.8
humanSLRootExploreProbWeightful = 0.0
humanSLPlaExploreProbWeightless = 0.0
humanSLPlaExploreProbWeightful = 0.0
humanSLOppExploreProbWeightless = 0.0
humanSLOppExploreProbWeightful = 0.0

humanSLCpuctExploration = 0.50
humanSLCpuctPermanent = 2.0

chosenMoveTemperatureEarly = 0.70
chosenMoveTemperature = 0.25
chosenMoveTemperatureHalflife = 30
chosenMoveTemperatureOnlyBelowProb = 1.0
chosenMoveSubtract = 0
chosenMovePrune = 0

nnCacheSizePowerOfTwo = 20
nnMutexPoolSizePowerOfTwo = 14

ignorePreRootHistory = false
analysisIgnorePreRootHistory = false

rootNumSymmetriesToSample = 2
useLcbForSelection = false

winLossUtilityFactor = 1.0
staticScoreUtilityFactor = 0.5
dynamicScoreUtilityFactor = 0.5

useUncertainty = false
subtreeValueBiasFactor = 0.0
useNoisePruning = false
Loading
Loading