tt: 修复和未开启置换表的对战,执黑胜率少3%,执白胜率少6%的问题

同时对于 alpha-beta 的函数传参形式进行修改:
search(depth - 1 + epsilon, alpha, beta, node->children[i]); 改为
-search(depth - 1 + epsilon, -beta, -alpha, node->children[i]); 的形式。

简化必败以及明显劣势的判断方式。只要判断 root 的值,不需要看其孩子。

并顺带:
删除 MIN_MAX_ONLY 宏相关代码;
删除 SORT_CONSIDER_PRUNED、CLEAR_PRUNED_FLAG_BEFORE_SEARCH 宏相关代码;

新的 TT 算法和修改前算法对战 6000 盘。
黑 22% : 10%, 白 86% : 74% 有 12% 的优势。
速度提升 1.5%, 可以认为无变化。

和未开TT对战1700盘,均为 11% : 83%,误差不超过1%,故可认为新TT已经不会
造成明显劣化。开TT和未开TT比,目前是4.6倍速度比。

自对弈棋谱会变化。自对弈时长 12s+, 最后一步是 -(1,8), 白方胜。
This commit is contained in:
Calcitem 2020-04-30 23:52:08 +08:00
parent 1f62f4469f
commit 7b566b1e71
4 changed files with 74 additions and 180 deletions

View File

@ -62,8 +62,6 @@
//#define TIME_STAT
//#define CYCLE_STAT
//#define MIN_MAX_ONLY
//#define EVALUATE_ENABLE
#ifdef EVALUATE_ENABLE
@ -82,7 +80,6 @@
//#define IDS_DEBUG
//#define IDS_ADD_VALUE
//#define CLEAR_PRUNED_FLAG_BEFORE_SEARCH
//#define DEEPER_IF_ONLY_ONE_LEGAL_MOVE
#define TRANSPOSITION_TABLE_ENABLE
@ -109,8 +106,6 @@
//#define DONOT_DELETE_TREE
//#define SORT_CONSIDER_PRUNED
//#define MESSAGEBOX_ENABLE
#ifdef DEBUG_MODE
@ -153,4 +148,4 @@
#define likely(expr) (__builtin_expect(!!(expr), 1))
#define unlikely(expr) (__builtin_expect(!!(expr), 0))
#endif // CONFIG_H
#endif // CONFIG_H

View File

@ -140,6 +140,10 @@ value_t Evaluation::getValue(StateInfo *st, Position *position, Node *node)
break;
}
if (st->position->sideToMove == PLAYER_WHITE) {
value = -value;
}
// 赋值返回
node->value = value;
return value;

View File

@ -207,10 +207,6 @@ Node *Node::addChild(
#ifdef ALPHABETA_AI
newNode->value = VALUE_ZERO;
newNode->rating = RATING_ZERO;
#ifdef SORT_CONSIDER_PRUNED
newNode->pruned = false;
#endif
#endif // ALPHABETA_AI
newNode->childrenSize = 0; // Important
@ -395,9 +391,7 @@ int AIAlgorithm::nodeCompare(const Node *first, const Node *second)
return 0;
}
int ret = (gSideToMove == PLAYER_BLACK ? 1 : -1);
return (first->value < second->value ? ret : -ret);
return (first->value < second->value ? 1 : -1);
}
return (first->rating < second->rating ? 1 : -1);
@ -411,9 +405,9 @@ void AIAlgorithm::sortMoves(Node *node)
//#define DEBUG_SORT
#ifdef DEBUG_SORT
for (int i = 0; i < node->childrenSize; i++) {
for (int moveIndex = 0; moveIndex < node->childrenSize; moveIndex++) {
loggerDebug("* [%d] %p: %d = %d %d (%d)\n",
i, &(node->children[i]), node->children[i]->move, node->children[i]->value, node->children[i]->rating, !node->children[i]->pruned);
moveIndex, &(node->children[moveIndex]), node->children[moveIndex]->move, node->children[moveIndex]->value, node->children[moveIndex]->rating, !node->children[moveIndex]->pruned);
}
loggerDebug("\n");
#endif
@ -454,14 +448,14 @@ void AIAlgorithm::sortMoves(Node *node)
#ifdef DEBUG_SORT
if (st->position.sideToMove == PLAYER_BLACK) {
for (int i = 0; i < node->childrenSize; i++) {
for (int moveIndex = 0; moveIndex < node->childrenSize; moveIndex++) {
loggerDebug("+ [%d] %p: %d = %d %d (%d)\n",
i, &(node->children[i]), node->children[i]->move, node->children[i]->value, node->children[i]->rating, !node->children[i]->pruned);
moveIndex, &(node->children[moveIndex]), node->children[moveIndex]->move, node->children[moveIndex]->value, node->children[moveIndex]->rating, !node->children[moveIndex]->pruned);
}
} else {
for (int i = 0; i < node->childrenSize; i++) {
for (int moveIndex = 0; moveIndex < node->childrenSize; moveIndex++) {
loggerDebug("- [%d] %p: %d = %d %d (%d)\n",
i, &(node->children[i]), node->children[i]->move, node->children[i]->value, node->children[i]->rating, !node->children[i]->pruned);
moveIndex, &(node->children[moveIndex]), node->children[moveIndex]->move, node->children[moveIndex]->value, node->children[moveIndex]->rating, !node->children[moveIndex]->pruned);
}
}
loggerDebug("\n----------------------------------------\n");
@ -592,26 +586,22 @@ int AIAlgorithm::search(depth_t depth)
loggerDebug("%d(%d) ", value, value - lastValue);
#ifdef IDS_DEBUG
loggerDebug(": --------------- depth = %d/%d ---------------\n", i, d);
loggerDebug(": --------------- depth = %d/%d ---------------\n", moveIndex, d);
int k = 0;
int cs = root->childrenSize;
for (int j = 0; j < cs; j++) {
if (root->children[j]->value == root->value
#ifdef SORT_CONSIDER_PRUNED
&& !root->children[j]->pruned
#endif
) {
for (int i = 0; i < cs; i++) {
if (root->children[i]->value == root->value) {
loggerDebug("[%.2d] %d\t%s\t%d\t%d *\n", k,
root->children[j]->move,
moveToCommand(root->children[j]->move),
root->children[j]->value,
root->children[j]->rating);
root->children[i]->move,
moveToCommand(root->children[i]->move),
root->children[i]->value,
root->children[i]->rating);
} else {
loggerDebug("[%.2d] %d\t%s\t%d\t%d\n", k,
root->children[j]->move,
moveToCommand(root->children[j]->move),
root->children[j]->value,
root->children[j]->rating);
root->children[i]->move,
moveToCommand(root->children[i]->move),
root->children[i]->value,
root->children[i]->rating);
}
k++;
@ -663,6 +653,8 @@ int AIAlgorithm::search(depth_t depth)
#endif // IDS_WINDOW
}
originDepth = d;
value = search(d, alpha, beta, root);
#ifdef TIME_STAT
@ -682,9 +674,6 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
// 评价值
value_t value;
// 当前节点的 MinMax 值,最终赋值给节点 value与 alpha 和 Beta 不同
value_t minMax;
// 临时增加的深度,克服水平线效应用
depth_t epsilon = 0;
@ -749,12 +738,6 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
#endif
node->value = probeVal;
#ifdef SORT_CONSIDER_PRUNED
if (type != TT::hashfEXACT && type != TT::hashfEMPTY) {
node->pruned = true; // TODO: 是否有用?
}
#endif
#if 0
// TODO: 有必要针对深度微调 value?
if (position->turn == PLAYER_BLACK)
@ -842,6 +825,7 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
);
if (node == root && moveSize == 1) {
best = moves[0];
return node->value;
}
}
@ -849,18 +833,8 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
// 排序子节点树
sortMoves(node);
// 根据演算模型执行 MiniMax 检索,对先手,搜索 Max, 对后手,搜索 Min
minMax = st->position->sideToMove == PLAYER_BLACK ? -VALUE_INFINITE : VALUE_INFINITE;
assert(node->childrenSize != 0);
#ifdef CLEAR_PRUNED_FLAG_BEFORE_SEARCH
#ifdef SORT_CONSIDER_PRUNED
node->pruned = false;
#endif // SORT_CONSIDER_PRUNED
#endif // CLEAR_PRUNED_FLAG_BEFORE_SEARCH
int nchild = node->childrenSize;
#ifdef TRANSPOSITION_TABLE_ENABLE
@ -881,8 +855,10 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
for (int i = 0; i < nchild; i++) {
// 棋局入栈保存,以便后续撤销着法
stashPosition();
doMove(node->children[i]->move);
player_t before = st->position->sideToMove;
move_t m = node->children[i]->move;
doMove(m);
player_t after = st->position->sideToMove;
if (gameOptions.getDepthExtension() == true && nchild == 1) {
epsilon = 1;
@ -891,74 +867,37 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
}
// 递归 Alpha-Beta 剪枝
value = search(depth - 1 + epsilon, alpha, beta, node->children[i]);
if (after != before) {
value = -search(depth - 1 + epsilon, -beta, -alpha, node->children[i]);
} else {
value = search(depth - 1 + epsilon, alpha, beta, node->children[i]);
}
undoMove();
switch (st->position->sideToMove) {
case PLAYER_BLACK:
// 为走棋一方的层, 局面对走棋的一方来说是以 α 为评价
// 取最大值
minMax = std::max(value, minMax);
// α 为走棋一方搜索到的最好值,任何比它小的值对当前结点的走棋方都没有意义
// 如果某个着法的结果小于或等于 α,那么它就是很差的着法,因此可以抛弃
if (value > alpha) {
if (value >= beta) {
#ifdef TRANSPOSITION_TABLE_ENABLE
hashf = TT::hashfEXACT;
hashf = TT::hashfBETA;
#endif
alpha = value;
}
node->value = beta;
goto out;
}
break;
case PLAYER_WHITE:
// 为走棋方的对手一方的层, 局面对对手一方来说是以 β 为评价
// 取最小值
minMax = std::min(value, minMax);
// β 表示对手目前的劣势,这是对手所能承受的最坏结果
// β 值越大,表示对手劣势越明显
// 在对手看来,他总是会找到一个对策不比 β 更坏的
// 如果当前结点返回 β 或比 β 更好的值,作为父结点的对方就绝对不会选择这种策略,
// 如果搜索过程中返回 β 或比 β 更好的值,那就够好的了,走棋的一方就没有机会使用这种策略了。
// 如果某个着法的结果大于或等于 β,那么整个结点就作废了,因为对手不希望走到这个局面,而它有别的着法可以避免到达这个局面。
// 因此如果我们找到的评价大于或等于β,就证明了这个结点是不会发生的,因此剩下的合理着法没有必要再搜索。
// TODO: 本意是要删掉这句,忘了删,结果反而棋力没有明显问题,待查
// 如果删掉这句,启用下面这段代码,则三有时不会堵并且计算效率较低
// 有了这句之后hashf 不可能等于 hashfBETA
beta = std::min(value, beta);
#if 0
if (value < beta) {
if (value > alpha) {
#ifdef TRANSPOSITION_TABLE_ENABLE
hashf = hashfBETA;
hashf = TT::hashfEXACT;
#endif
beta = value;
alpha = value;
if (depth == originDepth) {
best = m;
}
#endif
break;
default:
break;
}
#ifndef MIN_MAX_ONLY
// 如果某个着法的结果大于 α 但小于β,那么这个着法就是走棋一方可以考虑走的
// 否则剪枝返回
if (alpha >= beta) {
#ifdef SORT_CONSIDER_PRUNED
node->pruned = true;
#endif
break;
}
#endif /* !MIN_MAX_ONLY */
}
node->value = minMax;
node->value = alpha;
out:
#ifdef DEBUG_AB_TREE
node->alpha = alpha;
@ -978,15 +917,12 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
}
#endif // DONOT_DELETE_TREE
if (gameOptions.getIDSEnabled()) {
#ifdef IDS_ADD_VALUE
if (st->position->sideToMove == PLAYER_BLACK) {
node->children[0]->value += 1;
node->value += 1;
} else {
node->children[0]->value -= 1;
node->value -= 1;
}
node->children[0]->value += 1;
node->value += 1;
#endif /* IDS_ADD_VALUE */
}
@ -997,7 +933,7 @@ value_t AIAlgorithm::search(depth_t depth, value_t alpha, value_t beta, Node *no
hashf,
hash
#ifdef BEST_MOVE_ENABLE
, node->children[0]->move
, best
#endif // BEST_MOVE_ENABLE
);
#endif /* TRANSPOSITION_TABLE_ENABLE */
@ -1030,8 +966,7 @@ void AIAlgorithm::undoMove()
#ifdef ALPHABETA_AI
const char* AIAlgorithm::bestMove()
{
vector<Node*> bestMoves;
size_t bestMovesSize = 0;
char charChoose = '*';
if (!root->childrenSize) {
return "error!";
@ -1039,21 +974,22 @@ const char* AIAlgorithm::bestMove()
Board::printBoard();
int i = 0;
int moveIndex = 0;
int cs = root->childrenSize;
for (int j = 0; j < cs; j++) {
if (root->children[j]->value == root->value
#ifdef SORT_CONSIDER_PRUNED
&& !root->children[j]->pruned
#endif
) {
loggerDebug("[%.2d] %d\t%s\t%d\t%d *\n", i, root->children[j]->move, moveToCommand(root->children[j]->move), root->children[j]->value, root->children[j]->rating);
} else {
loggerDebug("[%.2d] %d\t%s\t%d\t%d\n", i, root->children[j]->move, moveToCommand(root->children[j]->move), root->children[j]->value, root->children[j]->rating);
for (int i = 0; i < cs; i++) {
if (root->children[i]->move != best) {
charChoose = ' ';
}
i++;
loggerDebug("[%.2d] %d\t%s\t%d\t%d %c\n", moveIndex,
root->children[i]->move,
moveToCommand(root->children[i]->move),
root->children[i]->value,
root->children[i]->rating,
charChoose);
moveIndex++;
}
player_t side = state->position->sideToMove;
@ -1061,17 +997,7 @@ const char* AIAlgorithm::bestMove()
#ifdef ENDGAME_LEARNING
// 检查是否明显劣势
if (gameOptions.getLearnEndgameEnabled()) {
bool isMostWeak = true; // 是否明显劣势
for (int j = 0; j < root->childrenSize; j++) {
if ((side == PLAYER_BLACK && root->children[j]->value > -VALUE_STRONG) ||
(side == PLAYER_WHITE && root->children[j]->value < VALUE_STRONG)) {
isMostWeak = false;
break;
}
}
if (isMostWeak) {
if (root->value <= -VALUE_STRONG) {
Endgame endgame;
endgame.type = state->position->sideToMove == PLAYER_BLACK ?
ENDGAME_PLAYER_WHITE_WIN : ENDGAME_PLAYER_BLACK_WIN;
@ -1083,40 +1009,13 @@ const char* AIAlgorithm::bestMove()
// 检查是否必败
if (gameOptions.getGiveUpIfMostLose() == true) {
bool isMostLose = true; // 是否必败
for (int j = 0; j < root->childrenSize; j++) {
if ((side == PLAYER_BLACK && root->children[j]->value > -VALUE_WIN) ||
(side == PLAYER_WHITE && root->children[j]->value < VALUE_WIN)) {
isMostLose = false;
break;
}
}
// 自动认输
if (isMostLose) {
if (root->value <= -VALUE_WIN) {
sprintf(cmdline, "Player%d give up!", state->position->sideId);
return cmdline;
}
}
int nchild = root->childrenSize;
for (int j = 0; j < nchild; j++) {
if (root->children[j]->value == root->value) {
bestMoves.push_back(root->children[j]);
}
}
bestMovesSize = bestMoves.size();
if (bestMovesSize == 0) {
loggerDebug("Not any child value is equal to root value\n");
for (int j = 0; j < root->childrenSize; j++) {
bestMoves.push_back(root->children[j]);
}
}
loggerDebug("Evaluated: %llu / %llu = %llu%%\n", evaluatedNodeCount, nodeCount, evaluatedNodeCount * 100 / nodeCount);
memmgr.memmgr_print_stats();
@ -1134,11 +1033,7 @@ const char* AIAlgorithm::bestMove()
#endif // TRANSPOSITION_TABLE_DEBUG
#endif // TRANSPOSITION_TABLE_ENABLE
if (bestMoves.empty()) {
return nullptr;
}
return moveToCommand(bestMoves[0]->move);
return moveToCommand(best);
}
#endif // ALPHABETA_AI
@ -1200,4 +1095,4 @@ void AIAlgorithm::loadEndgameFileToHashMap()
const QString filename = "endgame.txt";
endgameHashMap.load(filename);
}
#endif // ENDGAME_LEARNING
#endif // ENDGAME_LEARNING

View File

@ -116,9 +116,6 @@ public:
int childrenSize { 0 };
#ifdef ALPHABETA_AI
#ifdef SORT_CONSIDER_PRUNED
bool pruned { false };
#endif
value_t value { VALUE_UNKNOWN };
rating_t rating { RATING_ZERO };
#endif // ALPHABETA_AI
@ -300,6 +297,9 @@ private:
// 标识,用于跳出剪枝算法,立即返回
bool requiredQuit {false};
move_t best { MOVE_NONE };
depth_t originDepth { 0 };
private:
// 命令行
char cmdline[64] {};