{"id":53435,"date":"2024-11-08T18:43:22","date_gmt":"2024-11-08T10:43:22","guid":{"rendered":"https:\/\/swarma.org\/?p=53435"},"modified":"2024-11-08T18:43:22","modified_gmt":"2024-11-08T10:43:22","slug":"%e5%a4%a7%e6%a8%a1%e5%9e%8b2-0%e8%af%bb%e4%b9%a6%e4%bc%9a%ef%bc%9a%e8%9e%8d%e5%90%88%e5%ad%a6%e4%b9%a0%e4%b8%8e%e6%8e%a8%e7%90%86%e7%9a%84%e5%a4%a7%e6%a8%a1%e5%9e%8b%e6%96%b0%e8%8c%83%e5%bc%8f","status":"publish","type":"post","link":"https:\/\/swarma.org\/?p=53435","title":{"rendered":"\u5927\u6a21\u578b2.0\u8bfb\u4e66\u4f1a\uff1a\u878d\u5408\u5b66\u4e60\u4e0e\u63a8\u7406\u7684\u5927\u6a21\u578b\u65b0\u8303\u5f0f\uff01"},"content":{"rendered":"<div class='wxsyncmain'>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;margin-bottom: 0px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(63, 63, 63);font-family: PingFangSC-light;font-size: 15px;background-color: rgb(255, 255, 255);visibility: visible;\" data-mpa-powered-by=\"yiban.io\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;display: inline-block;width: 661px;vertical-align: top;background-color: rgb(246, 246, 246);visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;visibility: visible;\">\n<p style=\"text-align: center;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"385\" data-backw=\"578\" data-imgfileid=\"100217441\" data-ratio=\"0.6666666666666666\" data-s=\"300,640\"  data-type=\"jpeg\" data-w=\"1080\" style=\"width: 100%;height: auto !important;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-a40d0a82b8ac2507eb4d899b00eb4175.jpeg\"  \/><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;margin-top: 10px;outline: 0px;letter-spacing: 0.544px;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;width: 661px;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;padding-right: 3px;outline: 0px;float: left;line-height: 1;visibility: visible;\">\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;outline: 0px;text-align: left;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;padding-left: 10px;outline: 0px;display: inline-block;width: auto;vertical-align: top;min-width: 10%;height: auto;border-left: 3px solid rgb(33, 166, 210);border-bottom-left-radius: 0px;line-height: 0;visibility: visible;\">\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;outline: 0px;transform: translate3d(-13px, 0px, 0px);visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;display: inline-block;width: 25px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px;border-color: rgb(33, 166, 210) rgb(33, 166, 210) rgb(15, 76, 129);visibility: visible;\"><br style=\"-webkit-tap-highlight-color: transparent;outline: 0px;visibility: visible;\"  \/><\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;outline: 0px;transform: translate3d(-1px, 0px, 0px);visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;text-align: justify;color: rgb(33, 166, 210);font-size: 16px;line-height: 1.5;visibility: visible;\">\n<p style=\"-webkit-tap-highlight-color: transparent;outline: 0px;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;visibility: visible;\">\u5bfc\u8bed<\/strong><\/p>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;outline: 0px;transform: translate3d(-13px, 0px, 0px) rotateX(180deg);visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;display: inline-block;width: 24px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px;border-color: rgb(33, 166, 210) rgb(33, 166, 210) rgb(15, 76, 129);visibility: visible;\"><br style=\"-webkit-tap-highlight-color: transparent;outline: 0px;visibility: visible;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"-webkit-tap-highlight-color: transparent;padding-right: 4px;padding-left: 4px;outline: 0px;clear: right;box-shadow: rgb(0, 0, 0) 0px 0px 0px;visibility: visible;min-height: 4.5em !important;\">\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;margin-top: 5px;margin-bottom: 5px;outline: 0px;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;visibility: visible;\">\n<p style=\"-webkit-tap-highlight-color: transparent;outline: 0px;clear: none;line-height: 2em;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;text-indent: 0em;letter-spacing: 0.544px;font-size: 15px;visibility: visible;\"><span style=\"-webkit-tap-highlight-color: transparent;outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(0, 0, 0);visibility: visible;\"><strong><span style=\"color: rgb(63, 63, 63);letter-spacing: 0.544px;text-decoration: none solid rgb(0, 0, 0);\">o1\u6a21\u578b\u4ee3\u8868\u5927\u8bed\u8a00\u6a21\u578b\u878d\u5408\u5b66\u4e60\u4e0e\u63a8\u7406\u7684\u65b0\u8303\u5f0f\u3002\u96c6\u667a\u4ff1\u4e50\u90e8\u8054\u5408\u5317\u4eac\u5e08\u8303\u5927\u5b66\u7cfb\u7edf\u79d1\u5b66\u5b66\u9662\u6559\u6388\u5f20\u6c5f\u3001Google DeepMind\u7814\u7a76\u79d1\u5b66\u5bb6\u51af\u7199\u680b\u3001\u963f\u91cc\u5df4\u5df4\u5f3a\u5316\u5b66\u4e60\u7814\u7a76\u5458\u738b\u7ef4\u57d9\u548c\u4e2d\u79d1\u9662\u4fe1\u5de5\u6240\u5f20\u6770\u5171\u540c\u53d1\u8d77\u300c\u5927\u6a21\u578bII\uff1a\u878d\u5408\u5b66\u4e60\u4e0e\u63a8\u7406\u7684\u5927\u6a21\u578b\u65b0\u8303\u5f0f \u300d\u8bfb\u4e66\u4f1a\uff0c\u672c\u6b21\u8bfb\u4e66\u4f1a\u5c06\u5173\u6ce8\u5927\u6a21\u578b\u63a8\u7406\u8303\u5f0f\u7684\u6f14\u8fdb\u3001\u57fa\u4e8e\u641c\u7d22\u4e0e\u8499\u7279\u5361\u6d1b\u6811\u7684\u63a8\u7406\u4f18\u5316\u3001\u57fa\u4e8e\u5f3a\u5316\u5b66\u4e60\u7684\u5927\u6a21\u578b\u4f18\u5316\u3001\u601d\u7ef4\u94fe\u65b9\u6cd5\u4e0e\u5185\u5316\u673a\u5236\u3001\u81ea\u6211\u6539\u8fdb\u4e0e\u63a8\u7406\u9a8c\u8bc1\u3002\u5e0c\u671b\u901a\u8fc7\u8bfb\u4e66\u4f1a\u63a2\u7d22o1\u5177\u4f53\u5b9e\u73b0\u7684\u6280\u672f\u8def\u5f84\uff0c\u5e2e\u52a9\u6211\u4eec\u66f4\u597d\u7684\u7406\u89e3\u673a\u5668\u63a8\u7406\u548c\u4eba\u5de5\u667a\u80fd\u7684\u672c\u8d28\u3002<\/span><\/strong><\/span><\/strong><\/p>\n<p style=\"-webkit-tap-highlight-color: transparent;outline: 0px;clear: none;line-height: 2em;visibility: visible;\"><strong style=\"letter-spacing: 0.544px;-webkit-tap-highlight-color: transparent;outline: 0px;text-indent: 0em;font-size: 15px;visibility: visible;\"><span style=\"-webkit-tap-highlight-color: transparent;outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(0, 0, 0);visibility: visible;\"><strong><span style=\"text-decoration-style: solid;text-decoration-color: rgb(0, 0, 0);letter-spacing: 0.578px;\"><br  \/><\/span><\/strong><\/span><\/strong><\/p>\n<p style=\"-webkit-tap-highlight-color: transparent;outline: 0px;clear: none;line-height: 2em;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;text-indent: 0em;letter-spacing: 0.544px;font-size: 15px;visibility: visible;\"><\/strong><strong style=\"letter-spacing: 0.544px;-webkit-tap-highlight-color: transparent;outline: 0px;text-indent: 0em;font-size: 15px;visibility: visible;\"><span style=\"-webkit-tap-highlight-color: transparent;outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(0, 0, 0);visibility: visible;\"><strong><span style=\"text-decoration-style: solid;text-decoration-color: rgb(0, 0, 0);letter-spacing: 0.578px;\">\u4ece2024\u5e7411\u670830\u65e5\u5f00\u59cb\uff0c\u9884\u8ba1\u6bcf\u5468\u516d\u8fdb\u884c\u4e00\u6b21\uff0c\u6301\u7eed\u65f6\u95f4\u9884\u8ba1 6-8 \u5468\u5de6\u53f3\u3002\u6b22\u8fce\u611f\u5174\u8da3\u7684\u670b\u53cb\u62a5\u540d\u53c2\u52a0\uff0c\u6fc0\u53d1\u66f4\u591a\u7684\u601d\u7ef4\u706b\u82b1\uff01<\/span><\/strong><\/span><\/strong><\/p>\n<\/section>\n<section style=\"-webkit-tap-highlight-color: transparent;padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;letter-spacing: 0.544px;caret-color: var(--weui-BRAND);visibility: visible;\"><\/strong><\/strong><\/section>\n<section style=\"-webkit-tap-highlight-color: transparent;padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;letter-spacing: 0.544px;caret-color: var(--weui-BRAND);visibility: visible;\"><\/strong><\/strong><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;margin-top: 10px;outline: 0px;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;width: 661px;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;clear: both;line-height: 0;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;line-height: 0;width: 0px;visibility: visible;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align: top;visibility: visible;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"-webkit-tap-highlight-color: transparent;margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(63, 63, 63);font-family: PingFangSC-light;font-size: 15px;background-color: rgb(255, 255, 255);text-align: center;visibility: visible;\">\n<section style=\"-webkit-tap-highlight-color: transparent;outline: 0px;vertical-align: middle;display: inline-block;line-height: 0;visibility: visible;\"><img class=\"rich_pages wxw-img\" data-fileid=\"100098753\" data-imgfileid=\"100217431\" data-ratio=\"0.07314814814814814\"  data-type=\"png\" data-w=\"1080\" style=\"-webkit-tap-highlight-color: transparent;outline: 0px;vertical-align: middle;visibility: visible !important;width: 677px !important;height: auto !important;\" src=\"\"  \/><\/section>\n<\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/section>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">\u80cc\u666f\u4ecb\u7ecd<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">2024\u5e747\u6708\uff0cGoogle DeepMind\u53d1\u5e03\u7684AlphaProof\u5c55\u793aAI\u5728\u6570\u5b66\u63a8\u7406\u9886\u57df\u7684\u91cd\u8981\u7a81\u7834\uff0c\u8be5\u6a21\u578b\u80fd\u591f\u5728\u56fd\u9645\u5965\u6797\u5339\u514b\u6570\u5b66\u7ade\u8d5b(IMO)\u4e2d\u8fbe\u5230\u94f6\u724c\u6c34\u5e73\uff1b\u4e0d\u4e45\uff0c\u57282024\u5e749\u6708\uff0cOpenAI\u53d1\u5e03\u4e86\u5177\u6709\u91cc\u7a0b\u7891\u610f\u4e49\u7684o1\u6a21\u578b\uff0c\u5b83\u4eec\u6807\u5fd7\u7740\u5927\u8bed\u8a00\u6a21\u578b\u6b63\u5f0f\u8fdb\u5165\u201c\u6df1\u5ea6\u601d\u8003\u201d\u65f6\u4ee3\u3002\u4e0d\u540c\u4e8e\u4f20\u7edf\u7684\u5feb\u901f\u54cd\u5e94\u6a21\u5f0f\uff0co1\u901a\u8fc7\u5f3a\u5316\u5b66\u4e60\u5185\u5316\u4e86\u601d\u7ef4\u94fe\u63a8\u7406\u80fd\u529b\uff0c\u80fd\u591f\u5728\u56de\u7b54\u95ee\u9898\u65f6\u8fdb\u884c\u6df1\u5165\u7684\u63a8\u7406\u548c\u9a8c\u8bc1\u3002\u5728\u6570\u5b66\u3001\u7f16\u7a0b\u7b49\u9700\u8981\u590d\u6742\u63a8\u7406\u7684\u4efb\u52a1\u4e2d\uff0co1\u8868\u73b0\u51fa\u4e86\u60ca\u4eba\u7684\u80fd\u529b\u63d0\u5347\u2014\u2014\u5728\u56fd\u9645\u5965\u6797\u5339\u514b\u4fe1\u606f\u5b66\u7ade\u8d5b(IOI)\u4e2d\u8fbe\u5230\u91d1\u724c\u6c34\u5e73\uff0c\u5728\u7f8e\u56fd\u6570\u5b66\u9080\u8bf7\u8d5b(AIME)\u4e2d\u53d6\u5f9783%\u7684\u5f97\u5206\u7387\u3002\u8fd9\u4e00\u7a81\u7834\u6027\u8fdb\u5c55\u4e0d\u4ec5\u4ee3\u8868\u4e86AI\u5411\u7740\u66f4\u9ad8\u667a\u80fd\u5f62\u6001\u7684\u6f14\u8fdb\uff0c\u66f4\u4e3a\u6211\u4eec\u7406\u89e3\u673a\u5668\u63a8\u7406\u3001\u4eba\u5de5\u667a\u80fd\u7684\u672c\u8d28\u63d0\u4f9b\u4e86\u5168\u65b0\u89c6\u89d2\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4ece\u6280\u672f\u5c42\u9762\uff0co1\u7684\u6210\u529f\u63ed\u793a\u4e86\u4e00\u4e2a\u91cd\u8981\u8d8b\u52bf\uff1a\u5927\u6a21\u578b\u7684\u80fd\u529b\u63d0\u5347\u4e0d\u518d\u5c40\u9650\u4e8e\u7b80\u5355\u5730\u6269\u5927\u53c2\u6570\u89c4\u6a21\uff0c\u800c\u662f\u8f6c\u5411\u4e86\u5bf9\u63a8\u7406\u8fc7\u7a0b\u7684\u6df1\u5ea6\u4f18\u5316\u3002\u8fd9\u79cd\u8303\u5f0f\u8f6c\u53d8\u6d89\u53ca\u591a\u4e2a\u524d\u6cbf\u6280\u672f\u9886\u57df\u7684\u521b\u65b0\uff0c\u5305\u62ec\u601d\u7ef4\u94fe\u7684\u5185\u5316\u673a\u5236\u3001\u57fa\u4e8e\u641c\u7d22\u7684\u63a8\u7406\u4f18\u5316\u3001\u5f3a\u5316\u5b66\u4e60\u5728\u63a8\u7406\u80fd\u529b\u57f9\u517b\u4e2d\u7684\u5e94\u7528\u7b49\u3002\u8fd9\u4e9b\u6280\u672f\u8fdb\u5c55\u4e0d\u4ec5\u63a8\u52a8\u4e86AI\u9886\u57df\u7684\u53d1\u5c55\uff0c\u4e5f\u4e3a\u8ba4\u77e5\u79d1\u5b66\u3001\u8ba1\u7b97\u673a\u79d1\u5b66\u7b49\u591a\u4e2a\u5b66\u79d1\u5e26\u6765\u4e86\u6df1\u523b\u542f\u793a\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">\u6846\u67b6\u4ecb\u7ecd<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\">&nbsp;<\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><\/h1>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 16px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u672c\u7cfb\u5217\u8bfb\u4e66\u4f1a\u65e8\u5728\u6df1\u5165\u63a2\u8ba8\u5927\u6a21\u578b\u63a8\u7406\u65b0\u8303\u5f0f\u80cc\u540e\u7684\u6838\u5fc3\u6280\u672f\u548c\u57fa\u672c\u539f\u7406\u3002\u6211\u4eec\u5c06\u91cd\u70b9\u5173\u6ce8\u4ee5\u4e0b\u5173\u952e\u95ee\u9898\uff1a<\/span><\/p>\n<ol start=\"1\" class=\"list-paddingleft-1\" style=\"margin-left: 8px;margin-right: 8px;\">\n<li style=\"font-size: 15px;color: rgb(63, 63, 63);\">\n<p style=\"line-height: 1.75em;margin-bottom: 16px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u63a8\u7406\u8303\u5f0f\u7684\u6f14\u8fdb<\/strong>\uff1a\u5927\u6a21\u578b\u5982\u4f55\u4ece\u7b80\u5355\u7684\u6a21\u5f0f\u5339\u914d\u8d70\u5411\u6df1\u5ea6\u63a8\u7406\uff1fSystem 1\uff08\u5feb\u601d\u8003\uff09\u548cSystem 2\uff08\u6162\u601d\u8003\uff09\u7684\u6574\u5408\u673a\u5236\u662f\u4ec0\u4e48\uff1f<\/span><\/p>\n<\/li>\n<li style=\"font-size: 15px;color: rgb(63, 63, 63);\">\n<p style=\"line-height: 1.75em;margin-bottom: 16px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u57fa\u4e8e\u641c\u7d22\u4e0e\u8499\u7279\u5361\u6d1b\u6811\u7684\u63a8\u7406\u4f18\u5316<\/strong>\uff1a\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22(MCTS)\u7b49\u65b9\u6cd5\u5982\u4f55\u63d0\u5347\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\uff1f\u63a8\u7406\u8fc7\u7a0b\u7684\u53ef\u9760\u6027\u5982\u4f55\u4fdd\u8bc1\uff1f<\/span><\/p>\n<\/li>\n<li style=\"font-size: 15px;color: rgb(63, 63, 63);\">\n<p style=\"line-height: 1.75em;margin-bottom: 16px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u57fa\u4e8e\u5f3a\u5316\u5b66\u4e60\u7684\u5927\u6a21\u578b\u4f18\u5316<\/strong>\uff1a\u5f3a\u5316\u5b66\u4e60\u5982\u4f55\u57f9\u517b\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\uff1f\u81ea\u6211\u5bf9\u5f08(self-play)\u7b49\u673a\u5236\u7684\u4f5c\u7528\u662f\u4ec0\u4e48\uff1f<\/span><\/p>\n<\/li>\n<li style=\"font-size: 15px;color: rgb(63, 63, 63);\">\n<p style=\"line-height: 1.75em;margin-bottom: 16px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u601d\u7ef4\u94fe\u65b9\u6cd5\u4e0e\u5185\u5316\u673a\u5236<\/strong>\uff1a\u5982\u4f55\u5c06\u5916\u90e8\u63d0\u793a\u7684\u601d\u7ef4\u94fe\u8f6c\u5316\u4e3a\u6a21\u578b\u7684\u5185\u5728\u80fd\u529b\uff1f\u8fd9\u4e00\u8fc7\u7a0b\u4e0e\u4eba\u7c7b\u8ba4\u77e5\u5b66\u4e60\u6709\u4f55\u5f02\u540c\uff1f<\/span><\/p>\n<\/li>\n<li style=\"font-size: 15px;color: rgb(63, 63, 63);\">\n<section style=\"margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u81ea\u6211\u6539\u8fdb\u4e0e\u63a8\u7406\u9a8c\u8bc1<\/strong>\uff1a\u6a21\u578b\u5982\u4f55\u901a\u8fc7\u81ea\u6211\u9a8c\u8bc1\u548c\u8fed\u4ee3\u4f18\u5316\u63d0\u5347\u63a8\u7406\u80fd\u529b\uff1f\u8fd9\u79cd\u80fd\u529b\u7684\u8fb9\u754c\u5728\u54ea\u91cc\uff1f<\/span><\/section>\n<\/li>\n<\/ol>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u4e9b\u95ee\u9898\u4e0d\u4ec5\u5173\u7cfb\u5230AI\u6280\u672f\u7684\u53d1\u5c55\u65b9\u5411\uff0c\u4e5f\u6d89\u53ca\u5230\u5bf9\u667a\u80fd\u672c\u8d28\u7684\u6df1\u5165\u7406\u89e3\u3002\u6211\u4eec\u671f\u671b\u901a\u8fc7\u8fd9\u4e2a\u8bfb\u4e66\u4f1a\uff0c\u6c47\u805a\u6765\u81ea\u673a\u5668\u5b66\u4e60\u3001\u8ba4\u77e5\u79d1\u5b66\u3001\u6570\u5b66\u7b49\u4e0d\u540c\u9886\u57df\u7684\u89c2\u70b9\uff0c\u5171\u540c\u63a2\u7d22\u5927\u6a21\u578b\u63a8\u7406\u80fd\u529b\u63d0\u5347\u7684\u5185\u5728\u673a\u5236\uff0c\u4e3a\u4eba\u5de5\u667a\u80fd\u7684\u4e0b\u4e00\u4e2a\u53d1\u5c55\u9636\u6bb5\u8d21\u732e\u6d1e\u89c1\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"318\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"298\" data-height=\"1084\" data-imgfileid=\"100217432\" data-ratio=\"0.5666666666666667\"  data-type=\"png\" data-w=\"1080\" data-width=\"2042\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-b3b6c848844d052d96df430f781b619e.png\"  \/><\/section>\n<section style=\"margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<p style=\"margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/p>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">\u53d1\u8d77\u4eba\u4ecb\u7ecd<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><br  \/><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"316\" data-backw=\"562\" data-imgfileid=\"100217469\" data-ratio=\"0.562962962962963\"  data-type=\"png\" data-w=\"1080\" style=\"width: 100%;height: auto;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-d04aa322c0a572d9fefef9e6c38e59ce.png\"  \/><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u5f20\u6c5f<\/strong>\uff0c<span style=\"color: rgb(63, 63, 63);font-family: &quot;PingFang SC&quot;, system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px;text-size-adjust: auto;background-color: rgb(255, 255, 255);\">\u5317\u4eac\u5e08\u8303\u5927\u5b66\u7cfb\u7edf\u79d1\u5b66\u5b66\u9662\u6559\u6388\uff0c\u96c6\u667a\u4ff1\u4e50\u90e8\u3001\u96c6\u667a\u5b66\u56ed\u521b\u59cb\u4eba\uff0c\u96c6\u667a\u79d1\u5b66\u7814\u7a76\u4e2d\u5fc3\u7406\u4e8b\u957f\uff0c\u66fe\u4efb\u817e\u8baf\u7814\u7a76\u9662\u3001\u534e\u4e3a\u6218\u7565\u7814\u7a76\u9662\u7b49\u7279\u8058\u987e\u95ee\u3002\u4e3b\u8981\u7814\u7a76\u9886\u57df\u5305\u62ec\u56e0\u679c\u6d8c\u73b0\u3001\u590d\u6742\u7cfb\u7edf\u5206\u6790\u4e0e\u5efa\u6a21\u3001\u89c4\u6a21\u7406\u8bba\u7b49\u3002<\/span><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"321\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"321\" data-height=\"600\" data-imgfileid=\"100217435\" data-ratio=\"0.5714285714285714\"  data-type=\"png\" data-w=\"1050\" data-width=\"1050\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-d0885f3a3968fc5d2515af37116f1137.png\"  \/><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u51af\u7199\u680b<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\uff0c\u4f26\u6566\u5927\u5b66\u5b66\u9662\u8ba1\u7b97\u673a\u7cfb\u535a\u58eb\uff0c\u672c\u79d1\u6bd5\u4e1a\u4e8e\u6e05\u534e\u5927\u5b66\u81ea\u52a8\u5316\u7cfb\u3002\u5373\u5c06\u52a0\u5165 Google DeepMind \u62c5\u4efb\u7814\u7a76\u79d1\u5b66\u5bb6\u3002\u5176\u4e3b\u8981\u7814\u7a76\u65b9\u5411\u6db5\u76d6\u8bed\u8a00\u6a21\u578b\u3001\u5355\u667a\u80fd\u4f53\uff0c\u591a\u667a\u80fd\u4f53\uff0c\u4ee5\u53ca\u5143\u5f3a\u5316\u5b66\u4e60\u3002\u81f4\u529b\u4e8e\u901a\u8fc7\u5f3a\u5316\u5b66\u4e60\u63a8\u52a8\u4e0b\u4e00\u4ee3\u8bed\u8a00\u6a21\u578b\u7684\u53d1\u5c55\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: left;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/waterhorse1.github.io\/<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"316\" data-backw=\"562\" data-height=\"1080\" data-imgfileid=\"100217437\" data-ratio=\"0.562962962962963\"  data-type=\"png\" data-w=\"1080\" data-width=\"1920\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-9babd733168c2391fc94fd24bcaf727a.png\"  \/><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u738b\u7ef4\u57d9<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\uff0c\u5173\u6ce8\u5f3a\u5316\u5b66\u4e60\u524d\u6cbf\u6280\u672f\u7684\u63a2\u7d22\u4e0e\u5e94\u7528\uff0c\u7814\u7a76\u9886\u57df\u4e3a\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u3001\u6df1\u5ea6\u5f3a\u5316\u5b66\u4e60\u3001\u57fa\u4e8e\u4eba\u7c7b\u53cd\u9988\u7684\u5f3a\u5316\u5b66\u4e60\u3002\u5f53\u524d\u4ece\u4e8bRL for LLM\u76f8\u5173\u7684\u5de5\u4f5c\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: left;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4e2a\u4eba\u4e3b\u9875\uff1ahttp:\/\/wwxfromtju.github.io\/<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: left;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"316\" data-backw=\"562\" data-height=\"1080\" data-imgfileid=\"100217438\" data-ratio=\"0.562962962962963\"  data-type=\"png\" data-w=\"1080\" data-width=\"1920\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-59da62b05556c414507ad1869901c237.png\"  \/><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u5f20\u6770<\/strong>\uff0c\u4e2d\u79d1\u9662\u4fe1\u5de5\u6240\u56db\u5e74\u7ea7\u535a\u58eb\u751f\uff0c\u5b89\u8fdcAI\u4f19\u4f34\uff0c\u4e0a\u6d77\u4eba\u5de5\u667a\u80fd\u5b9e\u9a8c\u5ba4\u5b9e\u4e60\u751f\u3002\u5177\u6709\u4eba\u5de5\u667a\u80fd\u548c\u7f51\u7edc\u5b89\u5168\u4ea4\u53c9\u80cc\u666f\uff0c\u5173\u6ce8\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\u3002\u7814\u7a76\u65b9\u5411\u4e3a\u53ef\u4fe1AI\u3001\u53ef\u89e3\u91ca\u6027\u3002<\/span><\/section>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong><br  \/><\/strong><\/span><\/h2>\n<section style=\"margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong><br  \/><\/strong><\/span><\/section>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">\u62a5\u540d\u53c2\u4e0e\u8bfb\u4e66\u4f1a<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(33, 166, 210);\"><strong>\u8fd0\u884c\u6a21\u5f0f<\/strong><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong>\u4ece2024\u5e7411\u670830\u65e5\u5f00\u59cb<\/strong>\uff0c\u6bcf\u5468\u516d20:00-22:00\uff0c\u6301\u7eed\u65f6\u95f4\u9884\u8ba1 6-8 \u5468\u5de6\u53f3\uff0c\u6309\u8bfb\u4e66\u4f1a\u6846\u67b6\u8bbe\u8ba1\uff0c\u6bcf\u5468\u8fdb\u884c\u7ebf\u4e0a\u4f1a\u8bae\uff0c\u4e0e\u4e3b\u8bb2\u4eba\u7b49\u793e\u533a\u6210\u5458\u5f53\u9762\u4ea4\u6d41\uff0c\u4f1a\u540e\u53ef\u4ee5\u83b7\u5f97\u89c6\u9891\u56de\u653e\u6301\u7eed\u5b66\u4e60\u3002<\/span><\/p>\n<h4 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong><br  \/><\/strong><\/span><\/h4>\n<h4 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(33, 166, 210);\"><strong>\u62a5\u540d\u65b9\u5f0f<\/strong><\/span><\/h4>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7b2c\u4e00\u6b65\uff1a\u626b\u7801\u586b\u5199\u62a5\u540d\u4fe1\u606f\u3002<\/span><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: center;margin-bottom: 0px;\"><img class=\"rich_pages wxw-img\" data-height=\"400\" data-imgfileid=\"100217436\" data-ratio=\"1\"  data-type=\"png\" data-w=\"400\" data-width=\"400\" style=\"width: 173px;height: auto !important;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-1fb53b1af2a35924193bcd8817a01f3b.png\"  \/><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: center;margin-bottom: 8px;\"><span style=\"font-size: 13px;color: rgb(136, 136, 136);\">\u626b\u7801\u62a5\u540d\uff08\u53ef\u5f00\u53d1\u7968\uff09<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7b2c\u4e8c\u6b65\uff1a\u586b\u5199\u4fe1\u606f\u540e\uff0c\u4ed8\u8d39\u62a5\u540d\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><em>\u5982\u9700\u7528\u652f\u4ed8\u5b9d\u652f\u4ed8\uff0c\u8bf7\u5728PC\u7aef\u8fdb\u5165\u8bfb\u4e66\u4f1a\u9875\u9762\u62a5\u540d\u652f\u4ed8\uff1a<\/em><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7b2c\u4e09\u6b65\uff1a\u6dfb\u52a0\u8fd0\u8425\u8d1f\u8d23\u4eba\u5fae\u4fe1\uff0c\u83b7\u53d6\u6240\u6709\u63a8\u8350\u8bba\u6587\u8d44\u6e90\u5305\uff0c\u62c9\u5165\u5bf9\u5e94\u4e3b\u9898\u7684\u8bfb\u4e66\u4f1a\u793e\u533a\uff08\u5fae\u4fe1\u7fa4\uff09\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">PS\uff1a\u4e3a\u786e\u4fdd\u4e13\u4e1a\u6027\u548c\u8ba8\u8bba\u7684\u805a\u7126\uff0c\u672c\u8bfb\u4e66\u4f1a\u8c22\u7edd\u8131\u79bb\u8bfb\u4e66\u4f1a\u4e3b\u9898\u548c\u590d\u6742\u79d1\u5b66\u95ee\u9898\u672c\u8eab\u7684\u7a7a\u6cdb\u7684\u54f2\u5b66\u548c\u601d\u8fa8\u5f0f\u8ba8\u8bba\uff1b<strong>\u5982\u679c\u51fa\u73b0\u8ba8\u8bba\u5185\u5bb9\u4e0d\u7b26\u5408\u8981\u6c42\u3001\u7ecf\u63d0\u9192\u65e0\u6548\u8005\uff0c\u4f1a\u88ab\u79fb\u9664\u7fa4\u804a\u5e76\u5bf9\u672a\u53c2\u4e0e\u90e8\u5206\u9000\u8d39\u3002<\/strong><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><strong><br  \/><\/strong><\/span><\/section>\n<h3 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">\u52a0\u5165\u793e\u533a\u540e\u53ef\u4ee5\u83b7\u5f97\u7684\u8d44\u6e90\uff1a<\/span><\/strong><\/span><\/h3>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5b8c\u6574\u6743\u9650\uff0c\u5305\u62ec<strong>\u7ebf\u4e0a\u95ee\u7b54\u3001\u5f55\u64ad\u56de\u770b\u3001\u8d44\u6599\u5171\u4eab\u3001\u793e\u7fa4\u4ea4\u6d41\u3001\u4fe1\u606f\u540c\u6b65\u3001\u5171\u521b\u4efb\u52a1\u83b7\u53d6\u79ef\u5206\u7b49<\/strong>\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img\" data-backh=\"316\" data-backw=\"562\" data-height=\"540\" data-imgfileid=\"100217439\" data-ratio=\"0.5625\"  data-type=\"png\" data-w=\"960\" data-width=\"960\" style=\"width: 100%;height: auto !important;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-3a0991e270c32a409ad70611ff47f2b8.png\"  \/><\/section>\n<h3 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">\u53c2\u4e0e\u5171\u521b\u4efb\u52a1\u83b7\u53d6\u79ef\u5206\uff0c\u5171\u5efa\u5b66\u672f\u793e\u533a\uff1a<\/span><\/strong><\/span><\/h3>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8bfb\u4e66\u4f1a\u91c7\u7528\u5171\u5b66\u5171\u7814\u673a\u5236\uff0c\u6210\u5458\u901a\u8fc7\u5185\u5bb9\u5171\u521b\u83b7\u79ef\u5206\uff08<strong>\u5b57\u5e55\u4fee\u6539\u3001\u8bfb\u4e66\u4f1a\u7b14\u8bb0\u3001\u8bba\u6587\u901f\u9012\u3001\u516c\u4f17\u53f7\u6587\u7ae0\u3001\u96c6\u667a\u767e\u79d1\u3001\u8bba\u6587\u89e3\u8bfb\u7b49\u5171\u521b\u4efb\u52a1<\/strong>\uff09\uff0c\u79ef\u5206\u7b26\u5408\u6761\u4ef6\u5373\u53ef\u9000\u8d39\u3002\u53d1\u8d77\u4eba\u548c\u4e3b\u8bb2\u4eba\u540c\u6837\u9075\u5faa\u6b64\u673a\u5236\uff0c\u65e0\u989d\u5916\u91d1\u94b1\u6fc0\u52b1\u3002<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img\" data-backh=\"316\" data-backw=\"562\" data-height=\"1584\" data-imgfileid=\"100217440\" data-ratio=\"0.562962962962963\"  data-type=\"png\" data-w=\"1080\" data-width=\"2816\" style=\"width: 100%;height: auto !important;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-0887cabee2bf1f198759de969988da43.png\"  \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">PS\uff1a\u5177\u4f53\u53c2\u4e0e\u65b9\u5f0f\u53ef\u4ee5\u52a0\u5165\u8bfb\u4e66\u4f1a\u540e\u67e5\u770b\u5bf9\u5e94\u7684\u5171\u521b\u4efb\u52a1\u5217\u8868\uff0c\u9886\u53d6\u4efb\u52a1\uff0c\u4e0e\u8fd0\u8425\u8d1f\u8d23\u4eba\u6c9f\u901a\u8be6\u60c5\uff0c\u4e0a\u8ff0\u89c4\u5219\u7684\u6700\u7ec8\u89e3\u91ca\u6743\u5f52\u96c6\u667a\u4ff1\u4e50\u90e8\u6240\u6709\u3002<\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/h1>\n<section style=\"margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">\u53c2\u8003\u6587\u732e\u5217\u8868<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">Wang, J. (2024). &#8220;A Tutorial on LLM Reasoning: Relevant methods behind ChatGPT o1&#8221;.<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u6982\u89c8\u6027\u8bba\u6587\uff0c\u6df1\u5165\u5256\u6790\u4e86ChatGPT o1\u80cc\u540e\u7684\u6838\u5fc3\u6280\u672f\u539f\u7406\uff0c\u7279\u522b\u662f\u5176\u63a8\u7406\u9a8c\u8bc1\u673a\u5236\uff0c\u901a\u8fc7\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b\u7684\u5f62\u5f0f\u5316\u63cf\u8ff0\u4f7f\u590d\u6742\u7684\u6280\u672f\u66f4\u6613\u7406\u89e3\u3002<\/span><\/p>\n<\/blockquote>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\"><br  \/><\/span><\/strong><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">1. \u6d8c\u73b0\u80fd\u529b\u4e0eScaling Law\u7814\u7a76<\/span><\/strong><\/span><\/h2>\n<p style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"316\" data-backw=\"562\" data-imgfileid=\"100217445\" data-ratio=\"0.562962962962963\" data-s=\"300,640\"  data-type=\"png\" data-w=\"1080\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-e6af5a64930c3ed1c5a7923e1c093930.png\"  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[1] Wei J, Wang X, Schuurmans D, et al. Emergent Abilities of Large Language Models[C]\/\/Advances in Neural Information Processing Systems, 2022: 28.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2206.07682<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u7bc7\u7cfb\u7edf\u6027\u63a2\u8ba8\u5927\u6a21\u578b\u6d8c\u73b0\u80fd\u529b\u7684\u5f00\u521b\u6027\u5de5\u4f5c\u3002\u901a\u8fc7\u5b9e\u8bc1\u7814\u7a76\u63ed\u793a\u4e86\u6a21\u578b\u89c4\u6a21\u4e0e\u80fd\u529b\u6d8c\u73b0\u7684\u5173\u7cfb\uff0c\u5e76\u63d0\u51fa\u4e86\u8bc4\u4f30\u6d8c\u73b0\u80fd\u529b\u7684\u7406\u8bba\u6846\u67b6\uff0c\u4e3a\u540e\u7eed\u7814\u7a76\u5960\u5b9a\u57fa\u7840\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[2] Cohen S, Rosenfeld E, Kaplan P, et al. Are Emergent Abilities of Large Language Models a Mirage?[C]\/\/International Conference on Machine Learning, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2304.15004<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4ee5\u4e25\u8c28\u7684\u5b9e\u9a8c\u8bbe\u8ba1\u8d28\u7591\u6d8c\u73b0\u80fd\u529b\u7684\u771f\u5b9e\u6027\uff0c\u63d0\u51fa\u6d8c\u73b0\u73b0\u8c61\u53ef\u80fd\u53ea\u662f\u8bc4\u4f30\u65b9\u6cd5\u7684\u504f\u5dee\u3002\u8be5\u5de5\u4f5c\u4fc3\u4f7f\u5b66\u754c\u91cd\u65b0\u601d\u8003\u8bc4\u4f30\u65b9\u6cd5\u7684\u5408\u7406\u6027\uff0c\u63a8\u52a8\u4e86\u66f4\u4e25\u683c\u7684\u5b9e\u9a8c\u8303\u5f0f\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[3] Liu Y, Zheng S, Zhou Z, et al. A percolation model of emergence: Analyzing transformers trained on a formal language[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2408.12578v2<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u501f\u9274\u7edf\u8ba1\u7269\u7406\u4e2d\u7684\u6e17\u900f\u7406\u8bba\u89e3\u91ca\u6a21\u578b\u80fd\u529b\u6d8c\u73b0\u673a\u5236\u3002\u901a\u8fc7\u5efa\u7acb\u6570\u5b66\u6a21\u578b\u9610\u660e\u4e86\u8868\u5f81\u5b66\u4e60\u4e2d\u7684\u76f8\u53d8\u73b0\u8c61\uff0c\u4e3a\u7406\u89e3\u6d8c\u73b0\u673a\u5236\u63d0\u4f9b\u4e86\u5168\u65b0\u89c6\u89d2\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[4] Du Z, Zeng A, Dong Y, Tang J. Understanding emergent abilities of language models from the loss perspective[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2403.15796<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4ece\u4f18\u5316\u76ee\u6807\u548c\u635f\u5931\u51fd\u6570\u89c6\u89d2\u5206\u6790\u6d8c\u73b0\u80fd\u529b\u3002\u63ed\u793a\u4e86\u9884\u8bad\u7ec3\u635f\u5931\u4e0e\u4e0b\u6e38\u4efb\u52a1\u6027\u80fd\u4e4b\u95f4\u7684\u5185\u5728\u8054\u7cfb,\u4e3a\u6a21\u578b\u8bbe\u8ba1\u548c\u8bad\u7ec3\u63d0\u4f9b\u4e86\u7406\u8bba\u6307\u5bfc\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[5] Chen H, Yang X, Zhu J, Wang W. Quantifying emergence in large language models[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2405.12617<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u4e00\u5957\u91cf\u5316\u8bc4\u4f30\u6d8c\u73b0\u80fd\u529b\u7684\u65b9\u6cd5\u4f53\u7cfb\u3002\u901a\u8fc7\u5f15\u5165\u4fe1\u606f\u8bba\u548c\u7edf\u8ba1\u5b66\u65b9\u6cd5\uff0c\u5b9e\u73b0\u4e86\u5bf9\u6d8c\u73b0\u73b0\u8c61\u7684\u5b9a\u91cf\u523b\u753b\uff0c\u4f7f\u6d8c\u73b0\u7814\u7a76\u66f4\u52a0\u79d1\u5b66\u5316\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[6] Kaplan J, McCandlish S, Henighan T, et al. Scaling laws for neural language models[J]. arXiv preprint, 2020.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2001.08361<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u63ed\u793a\u4e86\u6a21\u578b\u6027\u80fd\u4e0e\u8ba1\u7b97\u8d44\u6e90\u4e4b\u95f4\u7684\u5e42\u5f8b\u5173\u7cfb\u3002\u901a\u8fc7\u5927\u91cf\u5b9e\u9a8c\u5efa\u7acb\u7684scaling law\u7406\u8bba\uff0c\u6210\u4e3a\u6307\u5bfc\u5927\u6a21\u578b\u8bad\u7ec3\u7684\u91cd\u8981\u7406\u8bba\u57fa\u7840\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[7] Wu Y, Sun S, Li S, et al. Inference scaling laws: An empirical analysis of compute-optimal inference for LLM problem-solving[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2408.00724<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u7cfb\u7edf\u7814\u7a76\u4e86\u63a8\u7406\u9636\u6bb5\u7684scaling law\u3002\u53d1\u73b0\u63a8\u7406\u8ba1\u7b97\u4e0e\u6a21\u578b\u6027\u80fd\u4e4b\u95f4\u5b58\u5728\u72ec\u7279\u7684scaling\u89c4\u5f8b\uff0c\u4e3a\u63a8\u7406\u9636\u6bb5\u7684\u8ba1\u7b97\u8d44\u6e90\u5206\u914d\u63d0\u4f9b\u4e86\u7406\u8bba\u4f9d\u636e\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[8] Chen M, Liu Y, Zhang W. Towards a universal scaling law of LLM training and inference[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/openreview.net\/pdf?id=0O69Ng9LFT<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u7edf\u4e00\u7684\u8bad\u7ec3\u548c\u63a8\u7406scaling\u7406\u8bba\u6846\u67b6\u3002\u901a\u8fc7\u5efa\u7acb\u7edf\u4e00\u7684\u6570\u5b66\u6a21\u578b\uff0c\u63ed\u793a\u4e86\u8bad\u7ec3\u548c\u63a8\u7406\u9636\u6bb5scaling law\u7684\u5185\u5728\u8054\u7cfb\uff0c\u5177\u6709\u91cd\u8981\u7684\u7406\u8bba\u4ef7\u503c\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[9] Michaud E J, Liu Z, Girit U, Tegmark M. The quantization model of neural scaling[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/www.researchgate.net\/publication\/369476862_The_Quantization_Model_of_Neural_Scaling<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4ece\u91cf\u5b50\u5316\u89d2\u5ea6\u89e3\u91cascaling law\u73b0\u8c61\u3002\u521b\u65b0\u6027\u5730\u5f15\u5165\u91cf\u5b50\u8ba1\u7b97\u6982\u5ff5\uff0c\u4e3a\u7406\u89e3\u795e\u7ecf\u7f51\u7edc\u7684scaling\u7279\u6027\u63d0\u4f9b\u4e86\u65b0\u7684\u7406\u8bba\u5de5\u5177\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[10] Bordelon B, Atanasov A, Pehlevan C. A dynamical model of neural scaling laws[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2402.01092<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5efa\u7acb\u4e86\u52a8\u529b\u5b66\u89c6\u89d2\u4e0b\u7684scaling law\u6a21\u578b\u3002\u901a\u8fc7\u5f15\u5165\u975e\u7ebf\u6027\u52a8\u529b\u5b66\u7406\u8bba\uff0c\u6df1\u5165\u63ed\u793a\u4e86\u6a21\u578b\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u7684\u52a8\u6001\u7279\u6027\uff0c\u4e30\u5bcc\u4e86scaling law\u7684\u7406\u8bba\u5185\u6db5\u3002<\/span><\/p>\n<\/blockquote>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\"><br  \/><\/span><\/strong><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">2. \u57fa\u4e8e\u641c\u7d22\u4e0e\u8499\u7279\u5361\u6d1b\u6811\u7684\u63a8\u7406\u4f18\u5316<\/span><\/strong><\/span><\/h2>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"316\" data-backw=\"562\" data-imgfileid=\"100217446\" data-ratio=\"0.562962962962963\" data-s=\"300,640\"  data-type=\"png\" data-w=\"1080\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-a136d3b561cd5ac4a3ef5f203e39fd74.png\"  \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[11] Liu H, Fan C, Wu Y, et al. RAIN: Your language models can align themselves without finetuning[J]. arXiv preprint, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2309.07124<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u57fa\u4e8e\u641c\u7d22\u7684\u81ea\u5bf9\u9f50\u65b9\u6cd5\uff0c\u65e0\u9700\u5fae\u8c03\u5373\u53ef\u63d0\u5347\u6a21\u578b\u6027\u80fd\u3002\u901a\u8fc7\u667a\u80fd\u641c\u7d22\u7b56\u7565\u4f18\u5316\u8f93\u51fa,\u5728\u4fdd\u6301\u6a21\u578b\u53c2\u6570\u4e0d\u53d8\u7684\u60c5\u51b5\u4e0b\u5b9e\u73b0\u5bf9\u9f50\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[12] Feng X, Wan Z, Wen M, et al. AlphaZero-like tree-search can guide large language model decoding and training[J]. arXiv preprint, 2024. \u4ee3\u7801\u94fe\u63a5\uff1ahttps:\/\/github.com\/waterhorse1\/LLM_Tree_Search<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2309.17179<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u5c06AlphaZero\u7684MCTS\u65b9\u6cd5\u6210\u529f\u8fc1\u79fb\u5230\u8bed\u8a00\u6a21\u578b\u9886\u57df\u3002\u901a\u8fc7\u6811\u641c\u7d22\u5f15\u5bfc\u89e3\u7801\u548c\u8bad\u7ec3\u8fc7\u7a0b\uff0c\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\u548c\u51b3\u7b56\u8d28\u91cf\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[13] Li X, Zhou Y, Liang P, et al. Tree of thoughts: Deliberate problem solving with large language models[C]\/\/International Conference on Machine Learning, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2305.10601<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u601d\u7ef4\u6811\u6846\u67b6\uff0c\u5c06\u63a8\u7406\u8fc7\u7a0b\u5efa\u6a21\u4e3a\u6811\u7ed3\u6784\u3002\u901a\u8fc7\u7cfb\u7edf\u6027\u63a2\u7d22\u548c\u8bc4\u4f30\u4e0d\u540c\u63a8\u7406\u8def\u5f84,\u4e3a\u590d\u6742\u95ee\u9898\u6c42\u89e3\u63d0\u4f9b\u4e86\u65b0\u8303\u5f0f\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[14] Silver D, Hubert T, Schrittwieser J, et al. Mastering chess and shogi by self-play with a general reinforcement learning algorithm[J]. arXiv preprint, 2017.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/1712.01815<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">AlphaZero\u7684\u7ecf\u5178\u8bba\u6587\uff0c\u5960\u5b9a\u4e86MCTS\u4e0e\u6df1\u5ea6\u5b66\u4e60\u7ed3\u5408\u7684\u57fa\u7840\u3002\u5176\u6838\u5fc3\u601d\u60f3\u5bf9\u5927\u8bed\u8a00\u6a21\u578b\u7684\u63a8\u7406\u4f18\u5316\u5177\u6709\u91cd\u8981\u7684\u542f\u53d1\u610f\u4e49\u3002<\/span><\/p>\n<\/blockquote>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;font-family: mp-quote, -apple-system-font, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.034em;\">[15] Hao S, Gu Y, Ma H, et al. Reasoning with Language Model is Planning with World Model[J]. arXiv preprint, 2023.<\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2305.14992<\/span><\/section>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51faRAP\u6846\u67b6\uff0c\u7ed3\u5408\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u7b97\u6cd5\u8fdb\u884c\u7b56\u7565\u6027\u63a2\u7d22\uff0c\u5b9e\u73b0\u5728\u63a8\u7406\u7a7a\u95f4\u4e2d\u7684\u9ad8\u6548\u641c\u7d22\u548c\u89c4\u5212\uff0c\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u5728\u6570\u5b66\u63a8\u7406\u548c\u903b\u8f91\u63a8\u7406\u7b49\u4efb\u52a1\u4e0a\u7684\u8868\u73b0\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">[16] Zhu X, Wang J, Zhang L, et al. Solving Math Word Problems via Cooperative Reasoning induced Language Models[C]\/\/Annual Meeting of the Association for Computational Linguistics, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">https:\/\/arxiv.org\/abs\/2210.16257<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: justify;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">\u53d7\u4eba\u7c7b\u53cc\u7cfb\u7edf\u63a8\u7406\u6846\u67b6\u7684\u542f\u53d1\uff0c\u63d0\u51fa\u4e86CoRe\u6846\u67b6\uff0c\u901a\u8fc7\u6a21\u62df\u4eba\u7c7b\u7684\u5373\u65f6\u53cd\u5e94\u7cfb\u7edf\uff08\u751f\u6210\u5668\uff09\u548c\u6df1\u5ea6\u601d\u8003\u7cfb\u7edf\uff08\u9a8c\u8bc1\u5668\uff09\u7684\u534f\u4f5c\u63a8\u7406\u65b9\u5f0f\uff0c\u663e\u8457\u63d0\u5347\u4e86\u9884\u8bad\u7ec3\u8bed\u8a00\u6a21\u578b\u5728\u6570\u5b66\u5e94\u7528\u9898\u89e3\u51b3\u4e2d\u7684\u6027\u80fd\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">[17] Zhang D, Zhoubian S, Yue Y, et al. ReST-MCTS*: LLM Self-Training via Process Reward Guided Tree Search[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">https:\/\/arxiv.org\/abs\/2406.03816<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: justify;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">\u63d0\u51fa\u4e86ReST-MCTS*\u65b9\u6cd5\uff0c\u901a\u8fc7\u5c06\u8fc7\u7a0b\u5956\u52b1\u6307\u5bfc\u4e0e\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u76f8\u7ed3\u5408\uff0c\u5229\u7528\u6700\u7ec8\u6b63\u786e\u7b54\u6848\u6765\u63a8\u65ad\u6bcf\u4e00\u6b65\u7684\u8fc7\u7a0b\u5956\u52b1\uff0c\u4ece\u800c\u6536\u96c6\u9ad8\u8d28\u91cf\u7684\u63a8\u7406\u8f68\u8ff9\u6765\u8bad\u7ec3\u7b56\u7565\u548c\u5956\u52b1\u6a21\u578b\uff0c\u4e0d\u518d\u9700\u8981\u4f20\u7edf\u65b9\u6cd5\u4e2d\u5bf9\u6bcf\u4e00\u6b65\u8fdb\u884c\u4eba\u5de5\u6807\u6ce8\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">[18] Hu Z, Liu C, Feng X, et al. Uncertainty of Thoughts: Uncertainty-Aware Planning Enhances Information Seeking in Large Language Models[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">https:\/\/arxiv.org\/abs\/2402.03271<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: justify;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">\u63d0\u51fa\u4e86UoT\u7b97\u6cd5\uff0c\u901a\u8fc7\u7ed3\u5408\u4e0d\u786e\u5b9a\u6027\u611f\u77e5\u6a21\u62df\u3001\u57fa\u4e8e\u4fe1\u606f\u589e\u76ca\u7684\u5956\u52b1\u673a\u5236\u548c\u5956\u52b1\u4f20\u64ad\u65b9\u6848\uff0c\u4f7f\u5927\u8bed\u8a00\u6a21\u578b\u80fd\u591f\u4e3b\u52a8\u63d0\u51fa\u6709\u6548\u95ee\u9898\u6765\u83b7\u53d6\u4fe1\u606f\uff0c\u663e\u8457\u63d0\u5347\u4e86\u5728\u533b\u7597\u8bca\u65ad\u3001\u6545\u969c\u6392\u67e5\u7b49\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">[19] Tian Y, Peng B, Song L, et al. Toward Self-Improvement of LLMs via Imagination, Searching, and Criticizing[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;text-align: left;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;\">https:\/\/arxiv.org\/abs\/2404.12253<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86AlphaLLM\u6846\u67b6\uff0c\u901a\u8fc7\u7ed3\u5408\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u4e0e\u4e09\u91cd\u8bc4\u5224\u6a21\u578b\uff0c\u5efa\u7acb\u4e86\u4e00\u4e2a\u65e0\u9700\u989d\u5916\u6807\u6ce8\u7684\u81ea\u6211\u63d0\u5347\u5faa\u73af\u7cfb\u7edf\uff0c\u4e3a\u5982\u4f55\u89e3\u51b3\u641c\u7d22\u7a7a\u95f4\u8fc7\u5927\u548c\u6a21\u578b\u53cd\u9988\u4e3b\u89c2\u6027\u63d0\u51fa\u65b0\u7684\u89c1\u89e3\u3002<\/span><\/p>\n<\/blockquote>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\"><br  \/><\/span><\/strong><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">3. \u57fa\u4e8e\u5f3a\u5316\u5b66\u4e60\u7684\u4f18\u5316<\/span><\/strong><\/span><\/h2>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"316\" data-backw=\"562\" data-imgfileid=\"100217447\" data-ratio=\"0.562962962962963\" data-s=\"300,640\"  data-type=\"png\" data-w=\"1080\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-bea40f2d434f3b74a89eb8bf623a9f30.png\"  \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[20] Liu Y, Feng Y, Zhou W, et al. A survey on self-play methods in reinforcement learning[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2408.01072<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5168\u9762\u7efc\u8ff0\u4e86\u81ea\u5bf9\u5f08\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u7684\u53d1\u5c55\u5386\u7a0b\u3002\u7cfb\u7edf\u603b\u7ed3\u4e86\u6838\u5fc3\u6280\u672f\u548c\u5e94\u7528\u6210\u679c\uff0c\u4e3a\u5927\u6a21\u578b\u4f18\u5316\u63d0\u4f9b\u4e86\u91cd\u8981\u7684\u65b9\u6cd5\u8bba\u53c2\u8003\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[21] Song X, Wang Y, Zhang T, et al. A survey of reinforcement learning from human feedback[J]. arXiv preprint, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2312.14925<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7cfb\u7edf\u603b\u7ed3RLHF\u65b9\u6cd5\u7684\u6280\u672f\u53d1\u5c55\u3002\u6df1\u5165\u5206\u6790\u4e86\u4eba\u7c7b\u53cd\u9988\u5728\u6a21\u578b\u4f18\u5316\u4e2d\u7684\u4f5c\u7528\u673a\u5236\uff0c\u4e3a\u63d0\u5347\u6a21\u578b\u6027\u80fd\u6307\u660e\u4e86\u65b9\u5411\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[22] Huang J, Gu S, Hou L, et al. Self-boosting large language models with synthetic preference data[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2410.06961<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u521b\u65b0\u6027\u5730\u63d0\u51fa\u4f7f\u7528\u5408\u6210\u6570\u636e\u8fdb\u884c\u6a21\u578b\u81ea\u6211\u63d0\u5347\u3002\u901a\u8fc7\u81ea\u52a8\u751f\u6210\u9ad8\u8d28\u91cf\u7684\u504f\u597d\u6570\u636e\uff0c\u5b9e\u73b0\u4e86\u6a21\u578b\u80fd\u529b\u7684\u6301\u7eed\u589e\u5f3a\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[23] Ouyang L, Wu J, Jiang X, et al. Training language models to follow instructions with human feedback[C]\/\/Advances in Neural Information Processing Systems, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2203.02155<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">InstructGPT\u5f00\u521b\u6027\u5de5\u4f5c,\u5efa\u7acb\u4e86RLHF\u8303\u5f0f\u3002\u901a\u8fc7\u4eba\u7c7b\u53cd\u9988\u4f18\u5316\u6a21\u578b\u884c\u4e3a,\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u7684\u6307\u4ee4\u9075\u4ece\u80fd\u529b\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[24] Schulman J, Wolski F, Dhariwal P, et al. Proximal policy optimization algorithms[J]. arXiv preprint, 2017.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/1707.06347<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">PPO\u7b97\u6cd5\u7684\u5960\u57fa\u6027\u5de5\u4f5c\uff0c\u662fRLHF\u4e2d\u7684\u6838\u5fc3\u7ec4\u4ef6\u3002\u901a\u8fc7\u7ea6\u675f\u7b56\u7565\u66f4\u65b0\u6b65\u957f\uff0c\u5b9e\u73b0\u4e86\u7a33\u5b9a\u9ad8\u6548\u7684\u6a21\u578b\u4f18\u5316\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[25] Rafailov R, Sharma A, Mitchell E, et al. Direct preference optimization: Your language model is secretly a reward model[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2305.18290<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51faDPO\u65b9\u6cd5,\u5c06\u8bed\u8a00\u6a21\u578b\u89c6\u4e3a\u9690\u5f0f\u5956\u52b1\u6a21\u578b\u3002\u8fd9\u4e00\u6d1e\u89c1\u7b80\u5316\u4e86\u6a21\u578b\u4f18\u5316\u6d41\u7a0b,\u4e3aRLHF\u63d0\u4f9b\u4e86\u65b0\u7684\u7406\u8bba\u89c6\u89d2\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[26] Brown B, Juravsky J, Ehrlich R, et al. Large Language Monkeys: Scaling Inference Compute with Repeated Sampling[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2407.21787<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u521b\u65b0\u5730\u63a2\u7d22\u4e86\u63a8\u7406\u9636\u6bb5\u7684\u591a\u6b21\u91c7\u6837\u7b56\u7565\uff0c\u53d1\u73b0\u95ee\u9898\u89e3\u51b3\u7387\u4e0e\u91c7\u6837\u6b21\u6570\u5448\u5bf9\u6570\u7ebf\u6027\u5173\u7cfb\uff0c\u5e76\u8bc1\u660e\u4f7f\u7528\u4fbf\u5b9c\u6a21\u578b\u591a\u6b21\u91c7\u6837\u6bd4\u4f7f\u7528\u6602\u8d35\u6a21\u578b\u5355\u6b21\u91c7\u6837\u66f4\u5177\u6210\u672c\u6548\u76ca\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[27] Wu Y, Sun Z, Li S, et al. Inference Scaling Laws: An Empirical Analysis of Compute-Optimal Inference for Problem-Solving with Language Models[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2408.00724<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u7cfb\u7edf\u7814\u7a76\u4e86\u5927\u8bed\u8a00\u6a21\u578b\u5728\u63a8\u7406\u9636\u6bb5\u7684\u6700\u4f18\u914d\u7f6e\u7b56\u7565\uff0c\u63a2\u7d22\u4e86\u5982\u4f55\u5728\u6709\u9650\u8ba1\u7b97\u8d44\u6e90\u4e0b\u5e73\u8861\u63a8\u7406\u8ba1\u7b97\u91cf\u548c\u6027\u80fd\u63d0\u5347\u3002\u7814\u7a76\u53d1\u73b0\uff0c\u5c06\u5c0f\u578b\u6a21\u578b\u914d\u5408\u65b0\u578b\u6811\u641c\u7d22\u7b97\u6cd5\u4f7f\u7528\uff0c\u5f80\u5f80\u80fd\u8fbe\u5230\u6700\u4f73\u7684\u8ba1\u7b97\u6548\u7387\u5e73\u8861\u70b9\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[28] Bansal H, Hosseini A, Agarwal R, et al. Smaller, Weaker, Yet Better: Training LLM Reasoners via Compute-Optimal Sampling[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2408.16737<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u6311\u6218\u4e86\u4f7f\u7528\u5f3a\u5927\u6a21\u578b\u751f\u6210\u9ad8\u8d28\u91cf\u8bad\u7ec3\u6570\u636e\u7684\u4f20\u7edf\u89c2\u70b9\uff0c\u901a\u8fc7\u5b9e\u9a8c\u8bc1\u660e\u4f7f\u7528\u8f83\u5f31\u4f46\u8ba1\u7b97\u6210\u672c\u66f4\u4f4e\u7684\u6a21\u578b\u751f\u6210\u8bad\u7ec3\u6570\u636e\uff0c\u80fd\u5728\u56fa\u5b9a\u63a8\u7406\u9884\u7b97\u4e0b\u83b7\u5f97\u66f4\u597d\u7684\u6027\u80fd\u63d0\u5347\uff0c\u8fd9\u79cd\u65b9\u6cd5\u5728\u591a\u4e2a\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u90fd\u4f18\u4e8e\u4f7f\u7528\u5f3a\u5927\u6a21\u578b\u751f\u6210\u6570\u636e\u7684\u4f20\u7edf\u65b9\u6cd5\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[29] Snell C, Lee J, Xu K, et al. Scaling LLM Test-Time Compute Optimally can be More Effective than Scaling Model Parameters[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2408.03314<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u4e00\u79cd\u57fa\u4e8e\u4efb\u52a1\u96be\u5ea6\u7684\u81ea\u9002\u5e94\u8ba1\u7b97\u5206\u914d\u7b56\u7565\uff0c\u901a\u8fc7\u5bf9\u63a8\u7406\u65f6\u8ba1\u7b97\u8d44\u6e90\u7684\u667a\u80fd\u5206\u914d\uff0c\u5728\u56fa\u5b9a\u8ba1\u7b97\u91cf\u4e0b\u5c06\u6d4b\u8bd5\u65f6\u6027\u80fd\u63d0\u5347\u6548\u7387\u63d0\u9ad8\u4e864\u500d\u4ee5\u4e0a\uff0c\u751a\u81f3\u4f7f\u5c0f\u6a21\u578b\u5728\u67d0\u4e9b\u4efb\u52a1\u4e0a\u8d85\u8d8a\u4e8614\u500d\u5927\u7684\u6a21\u578b\u6027\u80fd\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[30] Kumar A, Zhuang V, Agarwal R, et al. Training Language Models to Self-Correct via Reinforcement Learning[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2409.12917<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u4e00\u79cd\u540d\u4e3aSCoRe\u7684\u591a\u8f6e\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\uff0c\u901a\u8fc7\u5b8c\u5168\u81ea\u751f\u6210\u7684\u6570\u636e\u548c\u7279\u6b8a\u7684\u6b63\u5219\u5316\u7b56\u7565\u6765\u8bad\u7ec3\u6a21\u578b\u7684\u81ea\u7ea0\u9519\u80fd\u529b\uff0c\u907f\u514d\u4e86\u4f20\u7edf\u65b9\u6cd5\u4e2d\u7684\u5206\u5e03\u4e0d\u5339\u914d\u548c\u884c\u4e3a\u5d29\u6e83\u95ee\u9898\uff0c\u5728MATH\u548cHumanEval\u57fa\u51c6\u6d4b\u8bd5\u4e0a\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u7684\u81ea\u7ea0\u9519\u6027\u80fd\u3002<\/span><\/p>\n<\/blockquote>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">4. \u601d\u7ef4\u94fe\u65b9\u6cd5\u4e0e\u5185\u5316\u673a\u5236<\/span><\/strong><\/span><\/h2>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"316\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"316\" data-imgfileid=\"100217448\" data-ratio=\"0.562962962962963\" data-s=\"300,640\"  data-type=\"png\" data-w=\"1080\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-d298c3ef8cf9e3bfe5f158a3b6fa9d40.png\"  \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[31] Wei J, Wang X, Schuurmans D, et al. Chain of thought prompting elicits reasoning in large language models[C]\/\/Advances in Neural Information Processing Systems, 2022: 35.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2201.11903<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u7cfb\u7edf\u6027\u63d0\u51fa\u601d\u7ef4\u94fe\u63d0\u793a\u65b9\u6cd5\uff0c\u5f00\u521b\u4e86\u663e\u5f0f\u63a8\u7406\u7684\u65b0\u8303\u5f0f\u3002\u901a\u8fc7\u63d0\u4f9b\u4e2d\u95f4\u63a8\u7406\u6b65\u9aa4\u7684\u793a\u4f8b\uff0c\u4f7f\u6a21\u578b\u80fd\u591f\u751f\u6210\u53ef\u89e3\u91ca\u7684\u63a8\u7406\u8fc7\u7a0b\uff0c\u5bf9\u540e\u7eed\u7814\u7a76\u4ea7\u751f\u6df1\u8fdc\u5f71\u54cd\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[32] Zhou A, Li S, Zhou Y, et al. Least-to-most prompting enables complex reasoning in LLM[C]\/\/Annual Meeting of the Association for Computational Linguistics, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2205.10625<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u6e10\u8fdb\u5f0f\u601d\u7ef4\u94fe\u65b9\u6cd5\uff0c\u5c06\u590d\u6742\u95ee\u9898\u5206\u89e3\u4e3a\u7b80\u5355\u5b50\u95ee\u9898\u9010\u6b65\u6c42\u89e3\u3002\u8fd9\u79cd\u81ea\u4e0b\u800c\u4e0a\u7684\u63a8\u7406\u7b56\u7565\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u5904\u7406\u590d\u6742\u4efb\u52a1\u7684\u80fd\u529b\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[33] Wang X, Wei J, Schuurmans D, et al. Self-consistency improves chain of thought reasoning in language models[J]. arXiv preprint, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2203.11171<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u521b\u65b0\u6027\u5730\u5f15\u5165\u81ea\u4e00\u81f4\u6027\u673a\u5236\u63d0\u5347\u63a8\u7406\u53ef\u9760\u6027\u3002\u901a\u8fc7\u591a\u8def\u5f84\u63a8\u7406\u548c\u4e00\u81f4\u6027\u6295\u7968,\u663e\u8457\u6539\u5584\u4e86\u6a21\u578b\u63a8\u7406\u7ed3\u679c\u7684\u7a33\u5b9a\u6027\u548c\u51c6\u786e\u6027\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[34] Chen Q, Qin L, Wang J\uff0cet al. Unlocking the boundaries of thought: A reasoning granularity framework[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2410.05695<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u601d\u7ef4\u94fe\u7c92\u5ea6\u6846\u67b6\uff0c\u7cfb\u7edf\u7814\u7a76\u4e86\u63a8\u7406\u6b65\u9aa4\u7684\u7c92\u5ea6\u5bf9\u6a21\u578b\u6027\u80fd\u7684\u5f71\u54cd\u3002\u8be5\u5de5\u4f5c\u4e3a\u601d\u7ef4\u94fe\u65b9\u6cd5\u7684\u4f18\u5316\u63d0\u4f9b\u4e86\u7406\u8bba\u6307\u5bfc\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[35] Zhang Y, Liu Y, Sun S, et al. Dualformer: Controllable fast and slow thinking by learning with randomized reasoning traces[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2410.09918<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5c06\u5fc3\u7406\u5b66\u4e2d\u7684\u5feb\u6162\u601d\u7ef4\u7406\u8bba\u5f15\u5165\u6a21\u578b\u67b6\u6784\u8bbe\u8ba1\u3002\u901a\u8fc7\u968f\u673a\u63a8\u7406\u8f68\u8ff9\u5b66\u4e60,\u5b9e\u73b0\u4e86\u5feb\u901f\u76f4\u89c9\u53cd\u5e94\u548c\u6df1\u5ea6\u63a8\u7406\u80fd\u529b\u7684\u7edf\u4e00\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[36] Nye M, Andreassen A J, Gur-Ari G, et al. Show your work: Scratchpads for intermediate computation with language models[J]. arXiv preprint, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2112.00114<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u521b\u65b0\u6027\u5730\u63d0\u51fa\u4e2d\u95f4\u8ba1\u7b97\u8349\u7a3f\u7684\u6982\u5ff5\uff0c\u4f7f\u6a21\u578b\u5177\u5907\u7c7b\u4f3c\u4eba\u7c7b\u7684\u6f14\u7b97\u80fd\u529b\u3002\u8fd9\u79cd\u65b9\u6cd5\u663e\u8457\u63d0\u5347\u4e86\u6570\u5b66\u548c\u7f16\u7a0b\u7b49\u9700\u8981\u6b65\u9aa4\u5206\u89e3\u7684\u4efb\u52a1\u8868\u73b0\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[37] Li M, Zhao Y, Yu B, et al. API-Bank: A Comprehensive benchmark for tool-augmented LLMs[J]. arXiv preprint, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2304.08244<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u7cfb\u7edf\u7814\u7a76\u601d\u7ef4\u94fe\u5728\u5de5\u5177\u4f7f\u7528\u573a\u666f\u7684\u5e94\u7528\u3002\u901a\u8fc7\u6784\u5efa\u5168\u9762\u7684\u8bc4\u6d4b\u57fa\u51c6\uff0c\u4e3a\u589e\u5f3a\u6a21\u578b\u7684\u5de5\u5177\u4f7f\u7528\u80fd\u529b\u63d0\u4f9b\u4e86\u91cd\u8981\u53c2\u8003\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[38] Chu Z, Chen J, Chen Q, et al. A survey of chain of thought reasoning: Advances, frontiers and future[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/ar5iv.labs.arxiv.org\/html\/2309.15402<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5168\u9762\u7efc\u8ff0\u601d\u7ef4\u94fe\u6280\u672f\u7684\u53d1\u5c55\u5386\u7a0b\u548c\u524d\u6cbf\u8fdb\u5c55\u3002\u7cfb\u7edf\u68b3\u7406\u4e86\u5173\u952e\u6280\u672f\u3001\u5e94\u7528\u573a\u666f\u548c\u672a\u6765\u65b9\u5411,\u4e3a\u7814\u7a76\u8005\u63d0\u4f9b\u4e86\u5b9d\u8d35\u7684\u53c2\u8003\u8d44\u6599\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[39] Zhang X, Ding D. Supervised Chain of Thought[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2410.14198<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: justify;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4ece\u8868\u8fbe\u80fd\u529b\u7684\u7406\u8bba\u89d2\u5ea6\u8bc1\u660e\u4e86\u601d\u7ef4\u94fe(CoT)\u53ef\u4ee5\u4f7f\u5e38\u6570\u6df1\u5ea6\u7684Transformer\u6a21\u578b\u5177\u5907\u5904\u7406\u4e32\u884c\u8ba1\u7b97\u7684\u80fd\u529b\uff0c\u5e76\u901a\u8fc7\u7406\u8bba\u5206\u6790\u548c\u5b9e\u9a8c\u9a8c\u8bc1\u8868\u660e\u4f7f\u7528CoT\u7684\u5e38\u6570\u6df1\u5ea6Transformer\u53ef\u4ee5\u89e3\u51b3\u4efb\u4f55\u7531\u5927\u5c0f\u4e3aT\u7684\u5e03\u5c14\u7535\u8def\u53ef\u89e3\u7684\u95ee\u9898\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[40] Li Z, Liu H, Zhou D, et al. Chain of Thought Empowers Transformers to Solve Inherently Serial Problems[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2402.12875<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: justify;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u540c\u6837\u8bc1\u660e\u4e86\u601d\u7ef4\u94fe(CoT)\u53ef\u4ee5\u4f7f\u5e38\u6570\u6df1\u5ea6\u7684Transformer\u6a21\u578b\u5177\u5907\u5904\u7406\u4e32\u884c\u8ba1\u7b97\u7684\u80fd\u529b\uff0c\u7a81\u7834\u4e86\u5176\u5728\u4e0d\u4f7f\u7528CoT\u65f6\u53ea\u80fd\u89e3\u51b3AC^0\u7c7b\u95ee\u9898\u7684\u8868\u8fbe\u80fd\u529b\u4e0a\u9650\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[41] Deng Y, Choi Y, Shieber S. From Explicit CoT to Implicit CoT: Learning to Internalize CoT Step by Step[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2405.14838<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: justify;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e00\u79cd\u5c06\u663e\u5f0f\u601d\u7ef4\u94fe(CoT)\u5185\u5316\u4e3a\u9690\u5f0f\u63a8\u7406\u7684\u8bad\u7ec3\u65b9\u6cd5\uff0c\u901a\u8fc7\u9010\u6b65\u79fb\u9664\u4e2d\u95f4\u6b65\u9aa4\u5e76\u5fae\u8c03\u6a21\u578b\uff0c\u4f7f\u6a21\u578b\u5728\u4e0d\u8f93\u51fa\u4e2d\u95f4\u63a8\u7406\u6b65\u9aa4\u7684\u60c5\u51b5\u4e0b\u4fdd\u6301\u9ad8\u6027\u80fd\u8868\u73b0\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[42] Yang L, Yu Z, Zhang T, et al. Buffer of Thoughts: Thought-Augmented Reasoning with Large Language Models[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2406.04271<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: justify;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u601d\u7ef4\u7f13\u51b2\u533a(BoT)\u6846\u67b6\uff0c\u901a\u8fc7\u5b58\u50a8\u548c\u52a8\u6001\u66f4\u65b0\u9ad8\u5c42\u6b21\u601d\u7ef4\u6a21\u677f\uff0c\u5e76\u6839\u636e\u5177\u4f53\u95ee\u9898\u8fdb\u884c\u81ea\u9002\u5e94\u5b9e\u4f8b\u5316\uff0c\u663e\u8457\u63d0\u5347\u4e86\u5927\u8bed\u8a00\u6a21\u578b\u5728\u63a8\u7406\u4efb\u52a1\u4e2d\u7684\u51c6\u786e\u6027\u3001\u6548\u7387\u548c\u9c81\u68d2\u6027\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[43] Li J, Cao P, Chen Y, et al. Towards Faithful Chain-of-Thought: Large Language Models are Bridging Reasoners[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2405.18915<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: justify;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u901a\u8fc7\u6df1\u5165\u5206\u6790\u601d\u7ef4\u94fe\u6b65\u9aa4\u7684\u7c92\u5ea6\u548c\u63a8\u7406\u7ec4\u4ef6\u95f4\u7684\u56e0\u679c\u5173\u7cfb\uff0c\u63d0\u51fa\u4e86\u57fa\u4e8e\u63a8\u7406\u6865\u63a5\u7684\u65b9\u6cd5\uff0c\u901a\u8fc7\u5f52\u56e0\u6280\u672f\u548c\u8bed\u4e49\u4e00\u81f4\u6027\u7b5b\u9009\u6765\u7f13\u89e3\u5927\u8bed\u8a00\u6a21\u578b\u5728\u601d\u7ef4\u94fe\u63a8\u7406\u4e2d\u7684\u4e0d\u5fe0\u8bda\u95ee\u9898\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[44] Lyu Q, Havaldar S, Stein A, et al. Faithful Chain-of-Thought Reasoning[J]. arXiv preprint, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2301.13379<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51faFaithful CoT\u6846\u67b6\uff0c\u901a\u8fc7\u5c06\u63a8\u7406\u8fc7\u7a0b\u5206\u4e3a\u81ea\u7136\u8bed\u8a00\u8f6c\u7b26\u53f7\u63a8\u7406\u94fe\u548c\u786e\u5b9a\u6027\u6c42\u89e3\u5668\u4e24\u4e2a\u9636\u6bb5\uff0c\u786e\u4fdd\u4e86\u601d\u7ef4\u94fe\u7684\u5fe0\u8bda\u6027\uff0c\u540c\u65f6\u5728\u591a\u4e2a\u57fa\u51c6\u6d4b\u8bd5\u4e2d\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u6027\u80fd\u3002<\/span><\/p>\n<\/blockquote>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\"><br  \/><\/span><\/strong><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"font-size: 15px;\">5. \u81ea\u6211\u6539\u8fdb\u4e0e\u63a8\u7406\u9a8c\u8bc1<\/span><\/strong><\/span><\/h2>\n<p style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 8px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"316\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"316\" data-imgfileid=\"100217449\" data-ratio=\"0.562962962962963\" data-s=\"300,640\"  data-type=\"png\" data-w=\"1080\" style=\"height: auto;width: 100%;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-d07ff59b9f57539cb60820f703f6ff22.png\"  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4f20\u7edf\u5927\u8bed\u8a00\u6a21\u578b\u53d7\u9650\u4e8e\u8bad\u7ec3\u6570\u636e\u7684\u8d28\u91cf\u4e0a\u9650\uff0c\u96be\u4ee5\u50cf\u4eba\u7c7b\u4e00\u6837\u901a\u8fc7\u8bd5\u9519\u548c\u53cd\u601d\u6301\u7eed\u8fdb\u5316\u3002\u81ea\u6211\u6539\u8fdb\u4e0e\u63a8\u7406\u9a8c\u8bc1\u65e8\u5728\u7a81\u7834\u8fd9\u4e00\u9650\u5236\uff0c\u901a\u8fc7\u6784\u5efa\u9a8c\u8bc1\u673a\u5236\u548c\u6539\u8fdb\u7b56\u7565\uff0c\u8ba9\u6a21\u578b\u5177\u5907\u81ea\u4e3b\u5b66\u4e60\u548c\u80fd\u529b\u63d0\u5347\u7684\u80fd\u529b\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[45]Tao Z, Lin T E, Chen X, et al. A survey on self-evolution of large language models[J]. arXiv preprint arXiv:2404.14387, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2404.14387<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u7bc7\u7cfb\u7edf\u6027\u603b\u7ed3\u5927\u6a21\u578b\u81ea\u6211\u8fdb\u5316\u65b9\u6cd5\u7684\u7efc\u8ff0\u6587\u7ae0\uff0c\u63d0\u51fa\u4e86\u7ecf\u9a8c\u83b7\u53d6\u3001\u7ecf\u9a8c\u63d0\u70bc\u3001\u66f4\u65b0\u548c\u8bc4\u4f30\u7684\u5b8c\u6574\u6982\u5ff5\u6846\u67b6\uff0c\u4e3a\u8be5\u9886\u57df\u7814\u7a76\u63d0\u4f9b\u4e86\u91cd\u8981\u6307\u5bfc\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[46]Zelikman E, Wu Y H, Mu J, et al. STaR: Self-taught reasoner bootstrapping reasoning with reasoning[C]\/\/Proc. the 36th International Conference on Neural Information Processing Systems. 2024, 1126.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2203.14465<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5f00\u521b\u6027\u5730\u63d0\u51fa\u8ba9\u6a21\u578b\u4ece\u81ea\u8eab\u63a8\u7406\u8fc7\u7a0b\u4e2d\u5b66\u4e60\u7684\u65b9\u6cd5\uff0c\u4e3a\u5927\u6a21\u578b\u81ea\u6211\u6539\u8fdb\u9886\u57df\u5960\u5b9a\u4e86\u91cd\u8981\u57fa\u7840\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[47]Lightman H, Kosaraju V, Burda Y, et al. Let&#8217;s verify step by step[J]. arXiv preprint arXiv:2305.20050, 2023.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2305.20050<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">OpenAI\u63d0\u51fa\u7684\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b(PRM)\u6846\u67b6\uff0c\u901a\u8fc7\u7ec6\u7c92\u5ea6\u7684\u6b65\u9aa4\u9a8c\u8bc1\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u63a8\u7406\u80fd\u529b\uff0c\u5bf9ChatGPT o1\u7684\u53d1\u5c55\u4ea7\u751f\u76f4\u63a5\u5f71\u54cd\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[48] Huang J, Gu S S, Hou L, et al. Large language models can self-improve[J]. arXiv preprint, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2210.11610<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7cfb\u7edf\u63a2\u8ba8\u4e86\u5927\u6a21\u578b\u81ea\u6211\u6539\u8fdb\u7684\u53ef\u80fd\u6027\u4e0e\u65b9\u6cd5\u3002\u901a\u8fc7\u5b9e\u9a8c\u8bc1\u660e\u4e86\u6a21\u578b\u80fd\u591f\u901a\u8fc7\u81ea\u6211\u5b66\u4e60\u6301\u7eed\u63d0\u5347\u6027\u80fd,\u4e3a\u672a\u6765\u7814\u7a76\u6307\u660e\u4e86\u65b9\u5411\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[49]Peng K, Ding L, Zhong Q, et al. Token-level self-evolution training for sequence-to-sequence learning[C]\/\/Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers). 2023: 841-850.<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9996\u6b21\u5c06\u81ea\u6211\u8fdb\u5316\u6269\u5c55\u5230token\u7ea7\u522b\uff0c\u901a\u8fc7\u65b0\u7684\u6b63\u5219\u5316\u65b9\u6cd5\u663e\u8457\u63d0\u5347\u4e86\u5e8f\u5217\u751f\u6210\u4efb\u52a1\u7684\u6027\u80fd\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[50] Zhao J, Tong J, Mou Y, et al. Exploring the compositional deficiency of large language models in mathematical reasoning through trap problems[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2405.06680<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u901a\u8fc7\u8bbe\u8ba1\u9677\u9631\u95ee\u9898\u6df1\u5165\u5206\u6790\u6a21\u578b\u63a8\u7406\u7684\u5c40\u9650\u6027\u3002\u63ed\u793a\u4e86\u5927\u6a21\u578b\u5728\u6570\u5b66\u63a8\u7406\u4e2d\u7684\u7ec4\u5408\u6027\u7f3a\u9677\uff0c\u4e3a\u6539\u8fdb\u6a21\u578b\u63a8\u7406\u80fd\u529b\u63d0\u4f9b\u4e86\u91cd\u8981\u6d1e\u89c1\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[51] Sun Z, Shen Y, Zhou Q, et al. Principle-driven self-alignment of language models from scratch with minimal human supervision[J]. Advances in Neural Information Processing Systems, 2024, 36.<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u4e86\u4e00\u79cd\u57fa\u4e8e\u539f\u5219\u9a71\u52a8\u7684\u81ea\u5bf9\u9f50\u65b9\u6cd5,\u4ee5\u6700\u5c0f\u7684\u4eba\u5de5\u76d1\u7763\u5b9e\u73b0\u4e86\u6a21\u578b\u7684\u6709\u6548\u81ea\u5bf9\u9f50\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[52] Jiang X, Li F, Zhao H, et al. Long Term Memory: The Foundation of AI Self-Evolution[J]. arXiv preprint arXiv:2410.15665, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/pdf\/2410.15665<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8be6\u7ec6\u9610\u8ff0\u4e86\u957f\u671f\u8bb0\u5fc6\u5bf9 AI \u81ea\u6211\u8fdb\u5316\u7684\u91cd\u8981\u6027\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/mp.weixin.qq.com\/s\/BwIazafPjpQFtivIXTs5XA<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[53] Zelikman E, Wu Y, Mu J, et al. STaR: Bootstrapping reasoning with reasoning[C]\/\/Advances in Neural Information Processing Systems, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2203.14465<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5f00\u521b\u6027\u5730\u63d0\u51fa\u57fa\u4e8e\u63a8\u7406\u7684\u81ea\u6211\u6539\u8fdb\u65b9\u6cd5\u3002\u901a\u8fc7bootstrapping\u673a\u5236\u5b9e\u73b0\u6a21\u578b\u80fd\u529b\u7684\u8fed\u4ee3\u63d0\u5347,\u4e3a\u81ea\u6211\u6539\u8fdb\u7814\u7a76\u5f00\u8f9f\u4e86\u65b0\u65b9\u5411\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[54] Wang P, Li L, Shao Z, et al. Math-shepherd: Verify and reinforce LLMs step-by-step without human annotations[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2312.08935<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u65e0\u76d1\u7763\u7684\u6570\u5b66\u63a8\u7406\u9a8c\u8bc1\u65b9\u6cd5\u3002\u901a\u8fc7\u81ea\u52a8\u5316\u6b65\u9aa4\u9a8c\u8bc1\u548c\u5f3a\u5316\u5b66\u4e60\uff0c\u663e\u8457\u964d\u4f4e\u4e86\u5bf9\u4eba\u5de5\u6807\u6ce8\u7684\u4f9d\u8d56\uff0c\u63d0\u9ad8\u4e86\u6a21\u578b\u7684\u6570\u5b66\u63a8\u7406\u80fd\u529b\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[55] Lightman H, Kosaraju V, Burda Y, et al. Let&#8217;s verify step by step[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2305.20050<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">OpenAI\u63d0\u51fa\u7684\u7cfb\u7edf\u6027\u9a8c\u8bc1\u6846\u67b6\u3002\u901a\u8fc7\u7ec6\u7c92\u5ea6\u7684\u6b65\u9aa4\u9a8c\u8bc1\u548c\u53cd\u9988\u673a\u5236\uff0c\u5927\u5e45\u63d0\u5347\u4e86\u6a21\u578b\u63a8\u7406\u7684\u53ef\u9760\u6027\u548c\u51c6\u786e\u6027\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[56] Li Y, Lin Z, Zhang S, et al. Making large language models better reasoners with step-aware verifier[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2206.02336<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u521b\u65b0\u6027\u5730\u63d0\u51fa\u6b65\u9aa4\u611f\u77e5\u9a8c\u8bc1\u5668\u3002\u901a\u8fc7\u6df1\u5165\u7406\u89e3\u63a8\u7406\u6b65\u9aa4\u95f4\u7684\u4f9d\u8d56\u5173\u7cfb\uff0c\u5b9e\u73b0\u4e86\u66f4\u7cbe\u51c6\u7684\u63a8\u7406\u9a8c\u8bc1\u548c\u4f18\u5316\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[57] Cobbe K, Kosaraju V, Bavarian M, et al. Training verifiers to solve math word problems[J]. arXiv preprint, 2021.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2110.14168<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9a8c\u8bc1\u5668\u7814\u7a76\u7684\u5f00\u521b\u6027\u5de5\u4f5c\u3002\u9996\u6b21\u7cfb\u7edf\u63a2\u8ba8\u4e86\u9a8c\u8bc1\u5668\u5728\u6570\u5b66\u95ee\u9898\u6c42\u89e3\u4e2d\u7684\u5e94\u7528\uff0c\u4e3a\u540e\u7eed\u7814\u7a76\u5960\u5b9a\u4e86\u57fa\u7840\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[58] Li X, Yu P, Zhou C, et al. Self-alignment with instruction backtranslation[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2308.06259<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51fa\u57fa\u4e8e\u6307\u4ee4\u53cd\u5411\u7ffb\u8bd1\u7684\u81ea\u5bf9\u9f50\u65b9\u6cd5\u3002\u901a\u8fc7\u53cc\u5411\u8f6c\u6362\u5b66\u4e60\u6a21\u578b\u610f\u56fe\uff0c\u5b9e\u73b0\u4e86\u66f4\u6709\u6548\u7684\u81ea\u6211\u6539\u8fdb\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;font-family: mp-quote, -apple-system-font, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.034em;\">[59] Kumar A, Zhuang V, Agarwal R, et al. Training Language Models to Self-Correct via Reinforcement Learning[J]. arXiv preprint, 2024.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">https:\/\/arxiv.org\/abs\/2409.12917<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u63d0\u51faSCoRe\u65b9\u6cd5\uff0c\u901a\u8fc7\u591a\u8f6e\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u548c\u9002\u5f53\u7684\u6b63\u5219\u5316\u7b56\u7565\uff0c\u4f7f\u7528\u5b8c\u5168\u81ea\u751f\u6210\u7684\u6570\u636e\u6765\u8bad\u7ec3\u8bed\u8a00\u6a21\u578b\u7684\u81ea\u6211\u7ea0\u9519\u80fd\u529b\uff0c\u6709\u6548\u89e3\u51b3\u4e86\u76d1\u7763\u5fae\u8c03\u4e2d\u7684\u5206\u5e03\u4e0d\u5339\u914d\u548c\u884c\u4e3a\u5d29\u584c\u95ee\u9898\uff0c\u663e\u8457\u63d0\u5347\u4e86\u6a21\u578b\u7684\u81ea\u6211\u7ea0\u9519\u8868\u73b0\u3002<\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">[<\/span><span style=\"color: rgb(63, 63, 63);font-family: mp-quote, -apple-system-font, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.034em;\">60] Uesato J, Kushman N, Kumar R, et al. Solving math word problems with process- and outcome-based feedback[J]. arXiv preprint, 2022.<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;line-height: 1.75em;text-align: left;margin-bottom: 8px;\"><span style=\"color: rgb(63, 63, 63);font-size: 15px;font-family: mp-quote, -apple-system-font, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.034em;\">https:\/\/arxiv.org\/abs\/2211.14275<\/span><\/p>\n<blockquote data-type=\"quote_container\">\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u901a\u8fc7\u5bf9\u6bd4\u57fa\u4e8e\u8fc7\u7a0b\u548c\u57fa\u4e8e\u7ed3\u679c\u7684\u76d1\u7763\u65b9\u6cd5\u5728GSM8K\u6570\u636e\u96c6\u4e0a\u7684\u8868\u73b0\uff0c\u53d1\u73b0\u7eaf\u7cb9\u7684\u7ed3\u679c\u76d1\u7763\u80fd\u4ee5\u66f4\u5c11\u7684\u6807\u6ce8\u5b9e\u73b0\u76f8\u4f3c\u7684\u6700\u7ec8\u7b54\u6848\u51c6\u786e\u7387\uff0c\u4f46\u8981\u83b7\u5f97\u6b63\u786e\u7684\u63a8\u7406\u6b65\u9aa4\uff0c\u5219\u9700\u8981\u8fc7\u7a0b\u76d1\u7763\u6216\u6a21\u62df\u8fc7\u7a0b\u53cd\u9988\u7684\u5b66\u4e60\u578b\u5956\u52b1\u6a21\u578b\u7684\u76d1\u7763\u3002<\/span><\/section>\n<\/blockquote>\n<p style=\"text-align: center;margin-left: 8px;margin-right: 8px;line-height: 1.75em;margin-bottom: 0px;\"><br  \/><\/p>\n<p style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-backh=\"321\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"321\" data-imgfileid=\"100217454\" data-ratio=\"0.5712962962962963\" data-s=\"300,640\"  data-type=\"jpeg\" data-w=\"1080\" style=\"width: 100%;height: auto;\" src=\"\/wp-content\/uploads\/2024\/11\/wxsync-2024-11-8eb2800de08b33963b6236f34585b465.png\"  \/><\/p>\n<p style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;\"><br  \/><\/p>\n<p style=\"text-align: justify;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u53cc\u5341\u4e00\u9650\u65f6\u4f18\u60e0\u53c2\u89c1\uff1a<\/span><\/p>\n<p style=\"text-align: justify;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247701026&amp;idx=1&amp;sn=84b9591159862c66adce5a0c169c9a1d&amp;chksm=e89888afdfef01b97d2e8e80976f1cb998a7d81d836b7585d1a04875898f8e2439e2a36ef5c6&amp;scene=21#wechat_redirect\" textvalue=\"11.11\u4e00\u5e74\u4e00\u6b21\u9650\u65f6\u7279\u60e0\uff01\u52a0\u5165\u96c6\u667a\u5b66\u56edVIP\uff0c\u4e00\u6b21\u6027\u89e3\u9501\u96c6\u667a\u5e73\u53f0\u6240\u6709\u5185\u5bb9\u8d44\u6e90\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" style=\"font-size: 15px;text-decoration: underline;\" data-linktype=\"2\" rel=\"noopener noreferrer\"><span style=\"font-size: 15px;\">11.11\u4e00\u5e74\u4e00\u6b21\u9650\u65f6\u7279\u60e0\uff01\u52a0\u5165\u96c6\u667a\u5b66\u56edVIP\uff0c\u4e00\u6b21\u6027\u89e3\u9501\u96c6\u667a\u5e73\u53f0\u6240\u6709\u5185\u5bb9\u8d44\u6e90<\/span><\/a><br  \/><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;\"><br  \/><\/section>\n<p style=\"text-align: justify;margin-bottom: 0px;line-height: 1.75em;text-indent: 0em;margin-left: 0px;margin-right: 0px;\"><span style=\"color: rgb(255, 255, 255);font-family: PingFangSC-light;font-size: 15px;font-weight: 700;letter-spacing: 0.544px;text-align: left;background-color: rgb(12, 130, 169);\">\u70b9\u51fb\u201c\u9605\u8bfb\u539f\u6587\u201d\uff0c<\/span><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;color: rgb(255, 255, 255);font-family: PingFangSC-light;font-size: 15px;letter-spacing: 0.544px;text-align: left;white-space: normal;background-color: rgb(255, 255, 255);visibility: visible;\"><span style=\"-webkit-tap-highlight-color: transparent;outline: 0px;background-color: rgb(12, 130, 169);visibility: visible;\"><strong style=\"-webkit-tap-highlight-color: transparent;outline: 0px;letter-spacing: 0.544px;visibility: visible;\">\u62a5\u540d\u8bfb\u4e66\u4f1a<\/strong><\/span><\/strong><\/p>\n<p style=\"display: none;\"><mp-style-type data-value=\"3\"><\/mp-style-type><\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u5bfc\u8bed o1\u6a21\u578b\u4ee3\u8868\u5927\u8bed\u8a00\u6a21\u578b\u878d\u5408\u5b66\u4e60\u4e0e\u63a8\u7406\u7684\u65b0\u8303\u5f0f\u3002\u96c6\u667a\u4ff1\u4e50\u90e8\u8054\u5408\u5317\u4eac\u5e08\u8303\u5927\u5b66\u7cfb\u7edf\u79d1\u5b66\u5b66\u9662\u6559\u6388\u5f20\u6c5f\u3001Google DeepMind\u7814\u7a76\u79d1\u5b66\u5bb6\u51af\u7199\u680b\u3001\u963f\u91cc\u5df4\u5df4\u5f3a\u5316\u5b66\u4e60\u7814\u7a76\u5458\u738b\u7ef4\u57d9\u548c\u4e2d\u79d1\u9662\u4fe1\u5de5\u6240\u5f20\u6770\u5171\u540c\u53d1\u8d77\u300c\u5927\u6a21\u578bII\uff1a\u878d\u5408\u5b66\u4e60\u4e0e\u63a8\u7406\u7684\u5927\u6a21\u578b\u65b0\u8303\u5f0f \u300d\u8bfb\u4e66\u4f1a\uff0c\u672c\u6b21\u8bfb\u4e66\u4f1a\u5c06\u5173\u6ce8\u5927\u6a21\u578b\u63a8\u7406\u8303\u5f0f\u7684\u6f14\u8fdb\u3001\u57fa\u4e8e\u641c&#8230;<\/p>\n","protected":false},"author":0,"featured_media":53419,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[],"special":[],"_links":{"self":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/53435"}],"collection":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=53435"}],"version-history":[{"count":0,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/53435\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/media\/53419"}],"wp:attachment":[{"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=53435"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=53435"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=53435"},{"taxonomy":"special","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fspecial&post=53435"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}