{"id":58651,"date":"2025-04-16T20:02:14","date_gmt":"2025-04-16T12:02:14","guid":{"rendered":"https:\/\/swarma.org\/?p=58651"},"modified":"2025-04-16T20:02:14","modified_gmt":"2025-04-16T12:02:14","slug":"%e5%a4%a7%e8%af%ad%e8%a8%80%e6%a8%a1%e5%9e%8b%e5%a4%8d%e6%9d%82%e6%8e%a8%e7%90%86%e7%9a%84%e8%87%aa%e6%88%91%e8%bf%9b%e5%8c%96%e6%9c%ba%e5%88%b6%ef%bc%9a%e7%a0%94%e7%a9%b6%e7%bb%bc%e8%bf%b0%e4%b8%8e","status":"publish","type":"post","link":"https:\/\/swarma.org\/?p=58651","title":{"rendered":"\u5927\u8bed\u8a00\u6a21\u578b\u590d\u6742\u63a8\u7406\u7684\u81ea\u6211\u8fdb\u5316\u673a\u5236\uff1a\u7814\u7a76\u7efc\u8ff0\u4e0e\u524d\u6cbf\u5c55\u671b"},"content":{"rendered":"<div class='wxsyncmain'>\n<section data-mpa-powered-by=\"yiban.io\" data-pm-slice=\"0 0 []\">\n<section>\n<section style=\"text-align: right\">\n<section data-mpa-powered-by=\"yiban.io\" style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px;font-family: PingFangSC-light;font-size: 15px\" data-pm-slice=\"0 0 []\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: 661px;vertical-align: top\">\n<section style=\"margin: 10px 0px 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\">\n<section style=\"text-align: justify\"><span><img class=\"rich_pages wxw-img\" data-backh=\"324\" data-backw=\"578\" data-galleryid=\"615508642304712704\" data-gallerysupplier=\"100\" data-imgfileid=\"100227741\" data-ratio=\"0.5601851851851852\" data-s=\"300,640\" data-type=\"other\" data-w=\"1080\" style=\"width: 100%;height: auto !important\" src=\"\" \/><\/span><\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: 661px\">\n<section style=\"margin: 0px;padding: 0px 3px 0px 0px;max-width: 100%;float: left;line-height: 1\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;text-align: left\">\n<section style=\"margin: 0px;padding: 0px 0px 0px 10px;max-width: 100%;width: auto;vertical-align: top;min-width: 10%;height: auto;line-height: 0\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: 25px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px\"><span><br \/><\/span><\/section>\n<\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;text-align: justify;font-size: 16px;line-height: 1.5\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span>\u6458\u8981<\/span><\/strong><\/p>\n<\/section>\n<\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: 24px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 0px;padding: 0px 4px;max-width: 100%;clear: right;min-height: 4.5em !important\">\n<section style=\"margin: 5px 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px 8px;max-width: 100%;font-size: 13px;line-height: 2;letter-spacing: 0.544px\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>OpenAI\u7684O1\u53ca\u5176\u540e\u7eed\u7ade\u4e89\u8005\uff08\u5982DeepSeek R1\uff09\u7684\u53d1\u5e03\u663e\u8457\u63a8\u52a8\u4e86\u5927\u8bed\u8a00\u6a21\u578b\uff08Large Language Models\uff0cLLMs\uff09\u5728\u590d\u6742\u63a8\u7406\u65b9\u9762\u7684\u7814\u7a76\uff0c\u5f15\u53d1\u5b66\u672f\u754c\u4e0e\u5de5\u4e1a\u754c\u7684\u53cc\u91cd\u5173\u6ce8\u3002\u6b64\u9879\u8fdb\u5c55\u6fc0\u53d1\u4e86\u76f8\u5173\u6280\u672f\u6210\u679c\u7684\u590d\u73b0\u548c\u5728\u6b64\u57fa\u7840\u4e0a\u7684\u521b\u65b0\u3002\u4e3a\u7cfb\u7edf\u6784\u5efa\u8be5\u9886\u57df\u7684\u7814\u7a76\u6846\u67b6\uff0c\u672c\u6587\u4ece\u81ea\u6211\u8fdb\u5316\uff08self-evolution\uff09\u7684\u89d2\u5ea6\u7cfb\u7edf\u5730\u5206\u7c7b\u4e86\u73b0\u6709\u6280\u672f\u3002\u6211\u4eec\u7684\u8c03\u67e5\u53d1\u73b0\u5206\u4e3a\u4e09\u4e2a\u76f8\u4e92\u5173\u8054\u7684\u90e8\u5206\uff1a\u6570\u636e\u8fdb\u5316\uff08data evolution\uff09\u3001\u6a21\u578b\u8fdb\u5316\uff08model evolution\uff09\u548c\u81ea\u6211\u8fdb\u5316\uff08self-evolution\uff09\u3002<\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span><br \/><\/span><\/span><\/strong><\/p>\n<ul style=\"list-style-type: disc\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>\u6570\u636e\u8fdb\u5316\u90e8\u5206\u6539\u8fdb\u63a8\u7406\u8bad\u7ec3\u6570\u636e\uff0c\u8fd9\u5305\u62ec\u4efb\u52a1\u8fdb\u5316\u548c\u589e\u5f3a\u601d\u7ef4\u94fe\uff08Chain-of-Thought\uff0cCoT\uff09\u63a8\u7406\u7684\u63a8\u7406\u65f6\u95f4\u8ba1\u7b97\u3002<\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span><br \/><\/span><\/span><\/strong><\/p>\n<\/li>\n<li>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>\u6a21\u578b\u8fdb\u5316\u90e8\u5206\u901a\u8fc7\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4f18\u5316\u6a21\u578b\u6a21\u5757\uff0c\u4ee5\u589e\u5f3a\u590d\u6742\u63a8\u7406\u80fd\u529b\u3002<\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span><br \/><\/span><\/span><\/strong><\/p>\n<\/li>\n<li>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>\u81ea\u6211\u8fdb\u5316\u90e8\u5206\u5219\u63a2\u8ba8\u5176\u8fdb\u5316\u7b56\u7565\u548c\u6a21\u5f0f\u3002\u5305\u62ec\u81ea\u6211\u8fdb\u5316\u7684\u89c4\u6a21\u6cd5\u5219\uff08scaling law\uff09\u4e0e\u5bf9 O1 \u7c7b\u7814\u7a76\u5de5\u4f5c\u7684\u5206\u6790\u3002<\/span><\/span><\/strong><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span><br \/><\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>\u901a\u8fc7\u7cfb\u7edf\u68b3\u7406\u76f8\u5173\u7814\u7a76\uff0c\u6211\u4eec\u603b\u7ed3\u4e86\u524d\u6cbf\u65b9\u6cd5\uff0c\u5e76\u63d0\u4f9b\u4e86\u5bf9\u672a\u6765\u7814\u7a76\u65b9\u5411\u7684\u524d\u77bb\u6027\u5c55\u671b\u3002\u672c\u6587\u65e8\u5728\u6fc0\u52b1LLM\u590d\u6742\u63a8\u7406\u793e\u533a\u8fdb\u4e00\u6b65\u7814\u7a76\uff0c\u5e76\u4fc3\u8fdb\u5bf9LLM\u63a8\u7406\u80fd\u529b\u63d0\u5347\u7684\u6df1\u5165\u63a2\u7d22\u3002<\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span><br \/><\/span><\/span><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: none;min-height: 1em;line-height: 2em;text-align: justify\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 13px;letter-spacing: 0.544px\"><span>\u5173\u952e\u8bcd\uff1a<\/span><span>\u5927\u8bed\u8a00\u6a21\u578b\u3001\u590d\u6742\u63a8\u7406\u3001\u81ea\u6211\u8fdb\u5316\u3001\u6570\u636e\u8fdb\u5316\u3001\u6a21\u578b\u8fdb\u5316\u3001\u4e13\u5bb6\u8fed\u4ee3\u3001\u63a8\u7406\u8ba1\u7b97\u3001\u5f3a\u5316\u5b66\u4e60<\/span><\/span><\/strong><\/p>\n<\/section>\n<section style=\"margin: 0px;padding: 0px 8px;max-width: 100%;font-size: 13px;line-height: 2;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><\/strong><\/strong><\/section>\n<\/section>\n<section style=\"margin: 5px 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px 8px;max-width: 100%;font-size: 13px;line-height: 2;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><\/strong><\/strong><\/section>\n<\/section>\n<section style=\"margin: 5px 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px 8px;max-width: 100%;font-size: 13px;line-height: 2;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><\/strong><\/strong><\/section>\n<section style=\"margin: 0px;padding: 0px 8px;max-width: 100%;font-size: 13px;line-height: 2;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><\/strong><\/strong><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 10px 0px 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: 661px\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;line-height: 0\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 10px 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px;font-family: PingFangSC-light;font-size: 15px;text-align: center\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;vertical-align: middle;line-height: 0\"><img alt=\"\u56fe\u7247\" class=\"rich_pages wxw-img\" data-ratio=\"0.07314814814814814\" data-type=\"png\" data-w=\"1080\" style=\"margin: 0px;padding: 0px;max-width: 100%;vertical-align: middle;width: 677px !important;height: auto !important\" data-fileid=\"100098753\" data-imgfileid=\"100227057\" src=\"\" \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: right\"><span style=\", Arial, sans-serif;font-size: 13px;letter-spacing: 0.544px\" data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;section&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;text-align: right; margin-top: 10px; margin-bottom: 10px; letter-spacing: 0.544px; color: rgb(63, 63, 63); font-family: PingFangSC-light; font-size: 15px; background-color: rgb(255, 255, 255); -webkit-tap-highlight-color: transparent; outline: 0px; vertical-align: middle; display: inline-block; line-height: 0; visibility: visible;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\">Tao He, Hao Li, Jingchang Chen\u7b49<span style=\"font-weight: bold\">\u4e28\u4f5c\u8005<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: right\"><span style=\", Arial, sans-serif;font-size: 13px;letter-spacing: 0.544px\" data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px; margin-right: 8px; margin-bottom: 0px; line-height: 1.75em; text-align: right;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\">Azure<span style=\"font-weight: bold\">\u4e28\u8bd1\u8005<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: right\"><span style=\", Arial, sans-serif;font-size: 13px;letter-spacing: 0.544px\" data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px; margin-right: 8px; margin-bottom: 0px; line-height: 1.75em; text-align: right;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<blockquote>\n<p style=\"text-align: left\"><span style=\"font-size: 15px\"><span>\u8bba\u6587\u9898\u76ee\uff1aA Survey on Complex Reasoning of Large Language Models through the Lens of Self-Evolution<\/span><\/span><\/p>\n<p style=\"text-align: left\"><span style=\"font-size: 15px\"><span>\u8bba\u6587\u5730\u5740\uff1ahttps:\/\/www.researchgate.net\/publication\/389209259_A_Survey_on_Complex_Reasoning_of_Large_Language_Models_through_the_Lens_of_Self-Evolution<\/span><\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: left\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<blockquote>\n<p style=\"text-align: left\"><span style=\"font-size: 15px\" data-pm-slice=\"1 1 [&quot;blockquote&quot;,{&quot;type&quot;:&quot;normal&quot;,&quot;editId&quot;:null,&quot;title&quot;:&quot;&quot;,&quot;url&quot;:&quot;&quot;,&quot;nickname&quot;:&quot;&quot;,&quot;authorName&quot;:&quot;&quot;,&quot;from&quot;:&quot;&quot;,&quot;style&quot;:null},&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;text-align: left;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span>\u4f5c\u8005 | Tao He, Hao Li, Jingchang Chen, Runxuan Liu, Yixin Cao, Lizi Liao, Zihao Zheng, Zheng Chu, Jiafeng Liang, Ming Liu, Bing Qin<\/span><\/span><\/p>\n<\/blockquote>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-bottom: 0px;letter-spacing: 0.544px;, Arial, sans-serif\">\n<section style=\"margin-right: 8px;margin-left: 8px;letter-spacing: 0.544px;line-height: 1.75em\">\n<section data-id=\"85410\" data-tools=\"135\u7f16\u8f91\u5668\" style=\"letter-spacing: 0.54px\">\n<section style=\"margin: 10px auto\">\n<section style=\"margin-right: 8px;margin-left: 8px;padding: 20px\">\n<section style=\"text-align: left\"><strong style=\"letter-spacing: 0.54px;font-size: 16px\"><span>\u76ee\u5f55<\/span><\/strong><\/section>\n<p><span style=\"font-size: 15px\" data-mpa-action-id=\"m9ctvxa8je6\" data-pm-slice=\"0 0 []\"><span style=\"font-weight: bold\">1. \u5f15\u8a00<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">2. \u9884\u5907\u77e5\u8bc6<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">3. \u6570\u636e\u6f14\u5316<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">4. \u6a21\u578b\u6f14\u5316<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">5. \u81ea\u6211\u8fdb\u5316<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">6. \u5728\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u5185\u91cd\u65b0\u89e3\u8bfb\u4ee3\u8868\u6027O1\u7c7b\u7814\u7a76<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">7. \u672a\u6765\u6311\u6218\u548c\u65b9\u5411<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">8. \u7ed3\u8bba<\/span><\/span><\/p>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.544px;text-align: center;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>\u6458\u8981<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>OpenAI\u7684O1\u53ca\u5176\u540e\u7eed\u7ade\u4e89\u8005<\/span><span>\uff08\u5982DeepSeek R1\uff09<\/span><span>\u7684\u53d1\u5e03\u663e\u8457\u63a8\u52a8\u4e86\u5927\u8bed\u8a00\u6a21\u578b<\/span><span>\uff08Large Language Models\uff0cLLMs\uff09<\/span><span>\u5728\u590d\u6742\u63a8\u7406\u65b9\u9762\u7684\u7814\u7a76\uff0c\u5f15\u53d1\u5b66\u672f\u754c\u4e0e\u5de5\u4e1a\u754c\u7684\u53cc\u91cd\u5173\u6ce8\u3002\u6b64\u9879\u8fdb\u5c55\u6fc0\u53d1\u4e86\u76f8\u5173\u6280\u672f\u6210\u679c\u7684\u590d\u73b0\u548c\u5728\u6b64\u57fa\u7840\u4e0a\u7684\u521b\u65b0\u3002\u4e3a\u7cfb\u7edf\u6784\u5efa\u8be5\u9886\u57df\u7684\u7814\u7a76\u6846\u67b6\uff0c\u672c\u6587\u4ece<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316<\/span><\/span><\/strong><strong style=\"font-size: 15px\"><span><span>\uff08self-evolution\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u7684\u89d2\u5ea6\u7cfb\u7edf\u5730\u5206\u7c7b\u4e86\u73b0\u6709\u6280\u672f\u3002\u6211\u4eec\u7684\u8c03\u67e5\u53d1\u73b0\u5206\u4e3a\u4e09\u4e2a\u76f8\u4e92\u5173\u8054\u7684\u90e8\u5206\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6570\u636e\u8fdb\u5316\uff08data evolution\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6a21\u578b\u8fdb\u5316\uff08model evolution\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316\uff08self-evolution\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6570\u636e\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u90e8\u5206\u6539\u8fdb\u63a8\u7406\u8bad\u7ec3\u6570\u636e\uff0c\u8fd9\u5305\u62ec\u4efb\u52a1\u8fdb\u5316\u548c\u589e\u5f3a\u601d\u7ef4\u94fe<\/span><span>\uff08Chain-of-Thought\uff0cCoT\uff09<\/span><span>\u63a8\u7406\u7684\u63a8\u7406\u65f6\u95f4\u8ba1\u7b97\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6a21\u578b\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u90e8\u5206\u901a\u8fc7\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u4f18\u5316\u6a21\u578b\u6a21\u5757\uff0c\u4ee5\u589e\u5f3a\u590d\u6742\u63a8\u7406\u80fd\u529b\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u90e8\u5206\u5219\u63a2\u8ba8\u5176\u8fdb\u5316\u7b56\u7565\u548c\u6a21\u5f0f\u3002\u5305\u62ec<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316\u7684\u89c4\u6a21\u6cd5\u5219<\/span><\/span><\/strong><strong style=\"font-size: 15px\"><span><span>\uff08scaling law\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e0e\u5bf9 O1 \u7c7b\u7814\u7a76\u5de5\u4f5c\u7684\u5206\u6790\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u901a\u8fc7\u7cfb\u7edf\u68b3\u7406\u76f8\u5173\u7814\u7a76\uff0c\u6211\u4eec\u603b\u7ed3\u4e86\u524d\u6cbf\u65b9\u6cd5\uff0c\u5e76\u63d0\u4f9b\u4e86\u5bf9\u672a\u6765\u7814\u7a76\u65b9\u5411\u7684\u524d\u77bb\u6027\u5c55\u671b\u3002\u672c\u6587\u65e8\u5728\u6fc0\u52b1LLM\u590d\u6742\u63a8\u7406\u793e\u533a\u8fdb\u4e00\u6b65\u7814\u7a76\uff0c\u5e76\u4fc3\u8fdb\u5bf9LLM\u63a8\u7406\u80fd\u529b\u63d0\u5347\u7684\u6df1\u5165\u63a2\u7d22\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span><br \/><\/span><\/h1>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>1. \u5f15\u8a00<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd1\u5e74\u6765\uff0c\u5927\u8bed\u8a00\u6a21\u578b<\/span><span>\uff08LLMs\uff09<\/span><span>\u7684\u53d1\u5c55\u4ee4\u4eba\u77a9\u76ee\u3002\u5b83\u4eec\u4e0d\u4ec5\u5728\u9605\u8bfb\u7406\u89e3\u3001\u6545\u4e8b\u751f\u6210\u548c\u5bf9\u8bdd\u80fd\u529b\u7b49\u9886\u57df\u8d85\u51fa\u4e86\u9884\u671f\uff0c\u8fd8\u5728\u9700\u8981\u590d\u6742\u903b\u8f91\u63a8\u7406\u7684\u4efb\u52a1\u4e2d\u8868\u73b0\u51fa\u8272\uff0c\u5305\u62ec\u4ee3\u7801\u751f\u6210\u548c\u6570\u5b66\u95ee\u9898\u89e3\u51b3\u30022024\u5e74\u4e0b\u534a\u5e74\uff0cLLM\u7814\u7a76\u8fce\u6765\u4e86\u4e00\u4e2a\u5173\u952e\u65f6\u523b\uff0cOpenAI\u53d1\u5e03\u4e86O1<\/span><span>&nbsp;[OpenAI, 2024a]<\/span><span>\uff0c\u8fd9\u6807\u5fd7\u7740\u590d\u6742\u63a8\u7406\u7814\u7a76\u7684\u4e00\u4e2a\u91cd\u8981\u91cc\u7a0b\u7891\u3002O1\u7cfb\u5217\u6a21\u578b\u80fd\u591f\u751f\u6210\u884d\u751f\u7684\u63a8\u7406\u8fc7\u7a0b\uff0c\u7075\u6d3b\u5730\u5206\u89e3\u95ee\u9898\uff0c\u5e76\u5728\u9762\u4e34\u6311\u6218\u65f6\u81ea\u4e3b\u6f84\u6e05\u3001\u53cd\u601d\u548c\u7ea0\u6b63\u6f5c\u5728\u9519\u8bef\uff0c\u4ee5\u53ca\u63a2\u7d22\u66ff\u4ee3\u89e3\u51b3\u65b9\u6848\u2014\u2014\u6a21\u62df\u4e86\u4eba\u7c7b\u601d\u7ef4\u7279\u6709\u7684\u7ec6\u81f4\u3001\u53cd\u601d\u6027\u63a8\u7406\u8fc7\u7a0b<\/span><span>&nbsp;[OpenAI, 2024b]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5de5\u4e1a\u754c\u548c\u5b66\u672f\u754c\u90fd\u81f4\u529b\u4e8e\u590d\u73b0O1\uff0c\u6380\u8d77\u4e86\u4e00\u80a1\u6280\u672f\u62a5\u544a\u7684\u70ed\u6f6e\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u5de5\u4e1a\u754c\uff0c\u4e00\u7cfb\u5217\u7c7b\u4f3c\u7684\u4ea7\u54c1\u6d8c\u73b0\uff0c\u4f8b\u5982DeepSeek R1&nbsp;<\/span><span>[DeepSeek-AI et al.,2025]\uff08\u7b80\u79f0R1\uff09<\/span><span>\u3001Kimi k1.5<\/span><span>&nbsp;[Team et al., 2025]<\/span><span>\u548cQwQ&nbsp;<\/span><span>[Team, 2024b]<\/span><span>\uff0c\u5b83\u4eec\u90fd\u53d1\u5e03\u4e86\u81ea\u5df1\u7684\u6a21\u578b\u6216\u6280\u672f\u62a5\u544a\u3002\u8fd9\u4e9b\u4ea7\u54c1\u4e0d\u4ec5\u8fbe\u5230\u751a\u81f3\u8d85\u8d8a\u4e86O1\uff0c\u800c\u4e14\u5176\u5f00\u6e90\u8d21\u732e\u4e5f\u503c\u5f97\u79f0\u8d5e\u3002\u6b64\u5916\uff0c\u8fd9\u4e9b\u6280\u672f\u62a5\u544a\u4e2d\u5f3a\u8c03\u7684<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6269\u5c55\u5f3a\u5316\u5b66\u4e60\uff08Scaling Reinforcement Learning\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u7b49\u6280\u672f\uff0c\u8fdb\u4e00\u6b65\u62d3\u5c55\u4e86\u7814\u7a76\u7c7bO1\u5de5\u4f5c\u7684\u65b9\u5411\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u5b66\u672f\u754c\uff0c\u7814\u7a76\u8005\u4ece\u4e0d\u540c\u89d2\u5ea6\u8fdb\u884c\u4e86\u591a\u9879\u590d\u73b0\u7814\u7a76\u3002\u4f8b\u5982\uff0cO1 Journey<\/span><span>&nbsp;[Qin\u7b49\uff0c2024; Huang\u7b49\uff0c2024]&nbsp;<\/span><span>\u5e7f\u6cdb\u8ba8\u8bba\u4e86<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u601d\u7ef4\u94fe\u683c\u5f0f\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u84b8\u998f<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u4f46\u5bf9\u6301\u7eed\u4f18\u5316\u65b9\u6cd5\u7684\u89c1\u89e3\u6709\u9650\u3002\u4e0e\u6b64\u540c\u65f6\uff0cOpenR<\/span><span>&nbsp;[Wang\u7b49\uff0c2024e]<\/span><span>\u3001O1-Coder<\/span><span>&nbsp;[Zhang\u7b49\uff0c2024j]<\/span><span>\u7b49\u5de5\u4f5c\u4e3b\u8981\u901a\u8fc7\u5f3a\u5316\u5b66\u4e60\u7684\u89c6\u89d2\u7814\u7a76O1\uff0c\u4f46\u5ffd\u7565\u4e86\u5bf9\u53cd\u601d\u548c\u7ea0\u6b63\u63a8\u7406\u64cd\u4f5c\u7684\u8ba8\u8bba\u3002\u53e6\u4e00\u65b9\u9762\uff0cSlow Thinking\u7cfb\u5217\u5de5\u4f5c<\/span><span>[Jiang\u7b49\uff0c2024a; Min\u7b49\uff0c2024]<\/span><span>\u4e13\u6ce8\u4e8e<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u63a8\u7406\u65f6\u8ba1\u7b97<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u5c1d\u8bd5\u901a\u8fc7<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6811\u641c\u7d22\u6280\u672f<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u63d0\u5347\u63a8\u7406\u6027\u80fd\u3002\u6b64\u5916\uff0crStar-Math&nbsp;<\/span><span>[Guan\u7b49\uff0c2025]&nbsp;<\/span><span>\u901a\u8fc7\u4f7f\u7528<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316\u6846\u67b6<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8054\u5408\u8bad\u7ec3<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u63a8\u7406\u5668<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\uff08Process Reward Model, PRM\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u5b9e\u73b0\u4e86\u63a5\u8fd1O1\u7684\u6027\u80fd\uff0c\u7a81\u663e\u4e86\u8fed\u4ee3\u4f18\u5316\u5728\u63d0\u5347\u63a8\u7406\u80fd\u529b\u65b9\u9762\u7684\u6f5c\u529b\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u8fd9\u4e9b\u6280\u672f\u62a5\u544a\u63d0\u4f9b\u4e86\u5b9d\u8d35\u7684\u89c1\u89e3\uff0c\u4f46\u5b83\u4eec\u5f80\u5f80\u4ec5\u805a\u7126\u4e8e\u7279\u5b9a\u6280\u672f\u9886\u57df\uff0c\u7f3a\u4e4f\u6574\u4f53\u6027\u7684\u6280\u672f\u67b6\u6784\u548c\u7edf\u4e00\u7684\u5206\u7c7b\u6807\u51c6\u3002\u56e0\u6b64\uff0c\u6211\u4eec\u9700\u8981\u5bf9\u8fd9\u4e9b\u65b9\u6cd5\u8fdb\u884c\u7cfb\u7edf\u6027\u7684\u9ad8\u5c42\u6b21\u7efc\u8ff0\u3002O1\u535a\u5ba2<\/span><span>&nbsp;[OpenAI, 2024a]&nbsp;<\/span><span>\u548c\u7cfb\u7edf\u5361\u7247<\/span><span>&nbsp;[OpenAI, 2024b]&nbsp;<\/span><span>\u63d0\u793aO1\u91c7\u7528\u4e86\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08RL\uff09<\/span><span>\u548c\u63a8\u7406\u65f6\u8ba1\u7b97\u3002\u8fd9\u4f7f\u6211\u4eec\u8054\u60f3\u5230\u53e6\u4e00\u4e2a\u6770\u51fa\u7684\u4eba\u5de5\u667a\u80fd\u2014\u2014AlphaGo Zero<\/span><span>&nbsp;[Silver\u7b49\uff0c2017]<\/span><span>\u3002AlphaGo Zero\u901a\u8fc7\u81ea\u6211\u5bf9\u5f08\u3001\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u548c\u7b56\u7565\u6a21\u578b\u7684\u8fed\u4ee3\u5b66\u4e60\u5b9e\u73b0\u4e86\u81ea\u6211\u8fdb\u5316<\/span><span>&nbsp;[Silver et al., 2017]<\/span><span>&nbsp;\u3002\u8fd9\u4e00\u8fc7\u7a0b\u5728\u65e0\u4eba\u5e72\u9884\u7684\u60c5\u51b5\u4e0b\u63d0\u5347\u4e86\u5176\u6027\u80fd\uff0c\u542f\u53d1\u6211\u4eec\u91c7\u7528\u7c7b\u4f3c\u6280\u672f\u6216\u8bb8\u80fd\u5c06\u590d\u6742\u63a8\u7406\u80fd\u529b\u63d0\u5347\u81f3\u8d85\u8d8a\u4eba\u7c7b\u7684\u6c34\u5e73\u3002\u5728\u8fd9\u4e2a\u7c7b\u6bd4\u4e2d\uff0c\u8bad\u7ec3\u7b56\u7565\u6a21\u578b\u5bf9\u5e94\u4e8e\u63a8\u7406\u4f18\u5316\uff0c\u800cMCTS\u641c\u7d22\u5219\u5bf9\u5e94\u4e8e\u63a8\u7406\u65f6\u8ba1\u7b97\u3002\u81ea\u6211\u8fdb\u5316\u901a\u8fc7\u5faa\u73af\u8fd9\u4e24\u4e2a\u9636\u6bb5\uff0c\u5b9e\u73b0\u63a8\u7406\u80fd\u529b\u7684\u81ea\u4e3b\u63d0\u5347\u3002\u6b64\u5916\uff0c\u9ad8\u8d28\u91cf\u6570\u636e\u7684\u532e\u4e4f\u51f8\u663e\u4e86\u81ea\u52a8\u5316\u6570\u636e\u5408\u6210\u6846\u67b6\u7684\u8feb\u5207\u9700\u6c42&nbsp;<\/span><span>[Sutskever, 2024; Wang et al., 2024f]<\/span><span>\uff0c\u7531\u4e8e\u63a8\u7406\u4efb\u52a1\u5bf9\u903b\u8f91\u4e25\u8c28\u6027\u8981\u6c42\u66f4\u9ad8\uff0c\u8fd9\u4e00\u6311\u6218\u5c24\u4e3a\u7a81\u51fa\u3002\u56e0\u4e3a\u63a8\u7406\u4efb\u52a1\u5bf9\u903b\u8f91\u4e25\u8c28\u6027\u6709\u66f4\u9ad8\u8981\u6c42\u3002\u5728\u65e0\u4eba\u5e72\u9884\u7684\u60c5\u51b5\u4e0b\uff0c\u81ea\u6211\u8fdb\u5316\u4e0d\u4ec5\u80fd\u5229\u7528\u5408\u6210\u6570\u636e\u6765\u589e\u5f3a\u7cfb\u7edf\u80fd\u529b\uff0c\u8fd8\u53ef\u4ee5\u5229\u7528\u6539\u8fdb\u7684\u7cfb\u7edf\u5408\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u6570\u636e\uff0c\u521b\u9020\u4e00\u4e2a\u826f\u6027\u5faa\u73af\u63a8\u8fdb\u8fc7\u7a0b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9274\u4e8e\u8fd9\u4e9b\u8003\u8651\uff0c\u672c\u6587\u5e0c\u671b\u4ece\u81ea\u6211\u8fdb\u5316\u7684\u89c6\u89d2\u5bf9\u5927\u8bed\u8a00\u6a21\u578b\u7684\u590d\u6742\u63a8\u7406\u63d0\u4f9b\u5168\u9762\u7efc\u8ff0\u3002\u5927\u8bed\u8a00\u6a21\u578b\u590d\u6742\u63a8\u7406\u7684<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316\uff0c<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4ea6\u79f0\u81ea\u6211\u6539\u8fdb<\/span><span>\uff08self-improvement\uff09<\/span><span>\uff0c\u5176\u9700\u8981\u5728\u95ed\u73af\u63a8\u7406\u7cfb\u7edf\u4e2d\u81ea\u4e3b\u5408\u6210\u8bad\u7ec3\u6570\u636e\u5e76\u6301\u7eed\u63d0\u5347\u63a8\u7406\u80fd\u529b&nbsp;<\/span><span>&nbsp;[Tao\u7b49\uff0c2024; Hu\u7b49\uff0c2024]<\/span><span>\u3002<\/span><\/span><span style=\"line-height: 1.75em;font-size: 15px\"><strong><span style=\"font-size: 15px\"><span>\u4e13\u5bb6\u8fed\u4ee3\uff08Expert iteration\uff09<\/span><\/span><\/strong><\/span><span style=\"font-size: 15px\"><\/span><span style=\"font-size: 15px\"><span>[Polu\u7b49\uff0c2022; Zhao\u7b49\uff0c2024b]&nbsp;<\/span><span>\u88ab\u89c6\u4e3a\u5178\u578b\u7684\u81ea\u6211\u8fdb\u5316\u8303\u5f0f\u3002\u5176\u6838\u5fc3\u601d\u60f3\u662f\uff1a\u6a21\u578b\u9996\u5148\u751f\u6210\u63a8\u7406\u8f68\u8ff9\uff0c\u518d\u57fa\u4e8e\u6807\u51c6\u7b54\u6848\u7b5b\u9009\u51fa\u6b63\u786e\u89e3\u51b3\u65b9\u6848\uff0c\u6700\u540e\u5229\u7528\u8fd9\u4e9b\u89e3\u51b3\u65b9\u6848\u5bf9\u6a21\u578b\u8fdb\u884c\u5fae\u8c03\uff0c\u4ee5\u63d0\u5347\u5176\u63a8\u7406\u80fd\u529b\u3002\u8fd9\u4e00\u8fc7\u7a0b\u8fdb\u884c\u8fed\u4ee3\uff0c\u76f4\u81f3\u6a21\u578b\u6536\u655b\u3002\u6b64\u5916\uff0c\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u6846\u67b6\u540c\u6837\u4f53\u73b0\u4e86\u81ea\u6211\u8fdb\u5316\u7684\u7406\u5ff5\u3002\u667a\u80fd\u4f53\u9996\u5148\u8fdb\u884c\u63a2\u7d22\u5e76\u751f\u6210\u8f68\u8ff9\uff0c\u7136\u540e\u5229\u7528\u8fd9\u4e9b\u8f68\u8ff9\u8fdb\u884c\u81ea\u6211\u4f18\u5316\uff0c\u4ece\u800c\u5728\u540e\u7eed\u7684\u5b66\u4e60\u5468\u671f\u4e2d\u53d1\u73b0\u66f4\u9ad8\u8d28\u91cf\u7684\u8f68\u8ff9\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.4675925925925926\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-width=\"1399\" data-height=\"654\" data-imgfileid=\"100227502\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-c7250039971450ed4c4fa974f65ea30b.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96j5hdjptg\" data-pm-slice=\"0 0 []\"><span>\u56fe1\uff1a\u5927\u8bed\u8a00\u6a21\u578b\u4e2d\u81ea\u6211\u8fdb\u5316\u590d\u6742\u63a8\u7406\u80fd\u529b\u7684\u6982\u5ff5\u6846\u67b6\u3002\u6211\u4eec\u5728\u5b8c\u6574\u7684\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u4e2d\u8bc6\u522b\u51fa\u4e09\u4e2a\u7ec4\u6210\u90e8\u5206\uff1a\u6570\u636e\u8fdb\u5316\u3001\u6a21\u578b\u8fdb\u5316\u4ee5\u53ca\u8fdb\u5316\u7b56\u7565\u548c\u6a21\u5f0f\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5982\u56fe 1 \u6240\u793a\uff0c\u672c\u7efc\u8ff0\u7684\u7ed3\u6784\u7531\u4e09\u4e2a\u90e8\u5206\u7ec4\u6210\uff1a\u6570\u636e\u8fdb\u5316\u3001\u6a21\u578b\u8fdb\u5316\u548c\u81ea\u6211\u8fdb\u5316\u3002\u6570\u636e\u8fdb\u5316\u63a2\u7d22\u5408\u6210\u9ad8\u8d28\u91cf\u6570\u636e\uff0c\u5305\u542b\u4e24\u4e2a\u9636\u6bb5\uff1a\uff081\uff09\u4efb\u52a1\u8fdb\u5316\u751f\u6210\u63a8\u7406\u7cfb\u7edf\u5c1a\u672a\u80fd\u6709\u6548\u5904\u7406\u7684\u4efb\u52a1\uff0c\uff082\uff09\u601d\u7ef4\u94fe<\/span><span>\uff08CoT\uff09<\/span><span>\u8fdb\u5316\u901a\u8fc7\u6269\u5c55\u63a8\u7406\u65f6\u8ba1\u7b97<\/span><span>&nbsp;[Snell\u7b49\uff0c2024]<\/span><span>&nbsp;\u6269\u5c55\u5927\u8bed\u8a00\u6a21\u578b\u7684\u6027\u80fd\u8fb9\u754c\uff0c\u5e76\u901a\u8fc7\u601d\u7ef4\u94fe<\/span><span>\uff08Chain-of-Thought, CoT\uff09[Wei\u7b49\uff0c2022]&nbsp;<\/span><span>\u63a8\u7406\u751f\u6210\u66f4\u597d\u7684\u89e3\u51b3\u65b9\u6848\u3002\u7136\u800c\uff0c\u6027\u80fd\u7684\u63d0\u5347\u53ef\u80fd\u6e90\u4e8e\u542f\u53d1\u5f0f\u641c\u7d22\u7b97\u6cd5\u800c\u975e\u6a21\u578b\u7684\u56fa\u6709\u80fd\u529b\uff0c\u8fd9\u4ece\u6a21\u578b\u65e0\u6cd5\u6301\u7eed\u751f\u6210\u9ad8\u8d28\u91cf\u601d\u7ef4\u94fe\u7684\u8bc1\u636e\u53ef\u89c1\u4e00\u6591\u3002\u6a21\u578b\u8fdb\u5316\u901a\u8fc7\u63d0\u5347\u7cfb\u7edf\u6a21\u5757\u6765\u89e3\u51b3\u8fd9\u4e9b\u95ee\u9898\u3002\u4e00\u65b9\u9762\uff0c\u6a21\u578b\u4e13\u95e8\u9488\u5bf9\u5b83\u4eec\u66fe\u7ecf\u56f0\u96be\u7684\u4efb\u52a1\u8fdb\u884c\u8bad\u7ec3\uff1b\u53e6\u4e00\u65b9\u9762\uff0c\u5b83\u4eec\u4ece\u6536\u96c6\u7684\u6570\u636e\u4e2d\u6709\u9009\u62e9\u5730\u5b66\u4e60\uff0c\u4ee5\u771f\u6b63\u6269\u5c55\u5176\u80fd\u529b\u8fb9\u754c\u3002\u524d\u4e24\u79cd\u8fdb\u5316\u4ee3\u8868\u4e86\u63a2\u7d22\u6027\u548c\u53d1\u6563\u6027\u52aa\u529b\uff0c\u65e8\u5728\u7814\u7a76\u5b9e\u73b0\u6570\u636e\u548c\u6a21\u578b\u8fdb\u5316\u7684\u6709\u524d\u666f\u6280\u672f\u548c\u6311\u6218\u3002\u8fd9\u4e3a\u81ea\u6211\u8fdb\u5316\u5960\u5b9a\u4e86\u6570\u636e\u5408\u6210\u7b56\u7565\u548c\u4f18\u5316\u65b9\u6cd5\u7684\u6280\u672f\u57fa\u7840\u3002\u5728\u7b2c\u4e09\u90e8\u5206\uff0c\u6211\u4eec\u4e13\u6ce8\u4e8e\u63a8\u7406\u7cfb\u7edf\u7684\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u3002\u901a\u8fc7\u8fed\u4ee3\u8fdb\u884c\u6570\u636e\u8fdb\u5316\u548c\u6a21\u578b\u8fdb\u5316\uff0c\u63a8\u7406\u7cfb\u7edf\u5b9e\u73b0\u81ea\u6211\u8fdb\u5316\uff1a\u6570\u636e\u8fdb\u5316\u57fa\u4e8e\u5f53\u524d\u6a21\u578b\u751f\u6210\u66f4\u6709\u9488\u5bf9\u6027\u3001\u66f4\u9ad8\u8d28\u91cf\u7684\u6570\u636e\uff0c\u800c\u6a21\u578b\u8fdb\u5316\u5219\u5229\u7528\u6536\u96c6\u7684\u6570\u636e\u8fdb\u4e00\u6b65\u5f3a\u5316\u6a21\u578b\uff0c\u4e3a\u4e0b\u4e00\u8f6e\u6570\u636e\u8fdb\u5316\u63d0\u4f9b\u66f4\u575a\u5b9e\u7684\u57fa\u7840\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6211\u4eec\u7684\u8d21\u732e\u53ef\u603b\u7ed3\u5982\u4e0b\uff1a\uff081\uff09\u5168\u9762\u7efc\u8ff0\uff1a\u8fd9\u662f\u9996\u4e2a\u805a\u7126\u4e8e\u63a8\u7406\u81ea\u6211\u8fdb\u5316\u7684\u5927\u8bed\u8a00\u6a21\u578b\u7efc\u8ff0\uff1b\uff082\uff09\u5206\u7c7b\u4f53\u7cfb\uff1a\u6211\u4eec\u5728\u56fe2\u4e2d\u6784\u5efa\u4e86\u4e00\u4e2a\u8be6\u7ec6\u7684\u5206\u7c7b\u4f53\u7cfb\uff1b\uff083\uff09\u7406\u8bba\u57fa\u7840\uff1a\u6211\u4eec\u6574\u7406\u4e86\u76f8\u5173\u7684\u57fa\u7840\u7406\u8bba\uff0c\u5e76\u63a2\u8ba8\u4e86\u81ea\u6211\u8fdb\u5316\u7684\u89c4\u6a21\u6cd5\u5219\uff1b\uff084\uff09\u524d\u6cbf\u4e0e\u672a\u6765\uff1a\u6211\u4eec\u5206\u6790\u4e86\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u5185\u7684\u6700\u65b0\u5f00\u6e90\u7814\u7a76\uff0c\u5e76\u4e3a\u672a\u6765\u7814\u7a76\u6307\u660e\u65b9\u5411\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><img class=\"rich_pages wxw-img\" data-ratio=\"1.0617283950617284\" data-type=\"png\" data-w=\"891\" style=\"width: 100%;height: auto !important\" data-width=\"891\" data-height=\"946\" data-backw=\"562\" data-backh=\"597\" data-imgfileid=\"100227503\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-abd511b9e1bd77ba101bd16ad9f0f8c7.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jqnrgw8a\" data-pm-slice=\"0 0 []\"><span>\u56fe2\uff1a\u5148\u8fdb\u65b9\u6cd5\u7684\u5206\u7c7b\uff0c\u5305\u62ec\u6570\u636e\u8fdb\u5316\u3001\u6a21\u578b\u8fdb\u5316\u548c\u81ea\u6211\u8fdb\u5316\u3002<\/span><\/span><\/p>\n<h1 style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>2. \u9884\u5907\u77e5\u8bc6<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">2.1 \u80cc\u666f<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u672c\u7efc\u8ff0\u805a\u7126\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u4fc3\u6210\u7684\u590d\u6742\u63a8\u7406\u4efb\u52a1\u3002\u5177\u4f53\u800c\u8a00\uff0c\u6211\u4eec\u5173\u6ce8\u601d\u7ef4\u94fe\u63a8\u7406\uff0c\u5373\u5927\u8bed\u8a00\u6a21\u578b\u5728\u9884\u6d4b\u6700\u7ec8\u7b54\u6848\u4e4b\u524d\u751f\u6210\u9010\u6b65\u63a8\u7406\u8fc7\u7a0b<\/span><span>\uff08\u5373\u601d\u7ef4\u94fe\uff0cChain-of-Thought, CoT\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u4fbf\u4e8e\u540e\u7eed\u8ba8\u8bba\uff0c\u6211\u4eec\u5c06\u4efb\u52a1\u53ca\u5176\u89e3\u51b3\u8fc7\u7a0b\u5f62\u5f0f\u5316\u5982\u4e0b\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7ed9\u5b9a\u4efb\u52a1 q\uff0c\u5927\u8bed\u8a00\u6a21\u578b P<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;0040c7ef-2d84-4147-a5a5-4ef637f09172&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;p_{LLM}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\" data-mpa-action-id=\"m96msweh1w9c\" data-pm-slice=\"0 0 []\"><sub><span><span style=\"font-weight: bold\">LLM<\/span><\/span><\/sub><\/span><\/span><strong style=\"font-size: 15px\"><span><span>&nbsp;\u9996\u5148\u751f\u6210\u9010\u6b65\u601d\u7ef4\u94fe y\uff0c\u7136\u540e\u57fa\u4e8e y \u9884\u6d4b\u6700\u7ec8\u7b54\u6848 z\u3002\u8fd9\u4e00\u8fc7\u7a0b\u53ef\u4ee5\u7528\u6570\u5b66\u8868\u8fbe\u5f0f\u8868\u793a\u4e3a\uff1a<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.0712962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 307px;height: auto !important\" data-imgfileid=\"100227518\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-6563384890efe8116b595a970ce768a8.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;b04afe14-26f9-4697-b6cf-774f68715587&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;p_{LLM}(z|q) = p_{LLM}(z|q, y) \u00b7 p_{LLM}(y|q) \\ \\  \\ \\ (1)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u7531\u4e8e\u5728\u5b9e\u8df5\u4e2d y \u548c z \u901a\u5e38\u6309\u987a\u5e8f\u51fa\u73b0\uff0c\u6211\u4eec\u6709\u65f6\u4f1a\u7528 y \u6765\u8868\u793a\u89e3\u51b3\u65b9\u6848\uff0c\u6216\u8005\u540c\u65f6\u4ee3\u8868\u601d\u7ef4\u94fe\u548c\u6700\u7ec8\u7b54\u6848\u3002<\/span><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">2.2 \u63a8\u7406\u7cfb\u7edf\u6a21\u5757<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u501f\u9274\u73b0\u6709\u63a8\u7406\u7814\u7a76\u7684\u89c1\u89e3\uff0c\u6211\u4eec\u9996\u5148\u63cf\u8ff0\u4e86\u95ed\u73af\u81ea\u6211\u8fdb\u5316\u63a8\u7406<\/span><span>\uff08closed-loop self-evolution reasoning\uff09<\/span><span>\u6846\u67b6\u7684\u57fa\u672c\u7ec4\u4ef6\u3002\u5177\u4f53\u800c\u8a00\uff0c\u6211\u4eec\u786e\u5b9a\u4e86\u4ee5\u4e0b\u56db\u4e2a\u5173\u952e\u6a21\u5757\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u521b\u5efa\u5668\uff08Task Creator\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u63a8\u7406\u7cfb\u7edf\u9700\u8981<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4f5c\u4e3a\u8f93\u5165\u3002\u4efb\u52a1\u521b\u5efa\u5668\u6700\u76f4\u63a5\u7684\u5b9e\u73b0\u65b9\u5f0f\u662f\u4ece\u56fa\u5b9a\u4efb\u52a1\u96c6\u4e2d\u62bd\u6837\u3002\u7136\u800c\uff0c\u4e0e\u5355\u8f6e\u63a8\u7406\u6539\u8fdb\u4e0d\u540c\uff0c\u81ea\u6211\u8fdb\u5316\u9700\u8981\u901a\u8fc7\u8fed\u4ee3\u4f18\u5316\u6301\u7eed\u63d0\u5347\u63a8\u7406\u80fd\u529b\u3002\u56fa\u5b9a\u4efb\u52a1\u96c6\u53ef\u80fd\u5bfc\u81f4\u6027\u80fd\u5feb\u901f\u6536\u655b[Jiang\u7b49\uff0c2024a]\uff0c\u56e0\u4e3a\u7cfb\u7edf\u5b66\u4f1a\u8bc6\u522b\u9488\u5bf9\u7279\u5b9a\u4efb\u52a1\u7684&#8221;\u6377\u5f84&#8221;\uff0c\u4ece\u800c\u964d\u4f4e\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u56e0\u6b64\uff0c\u751f\u6210\u591a\u6837\u5316\u4efb\u52a1\u5bf9\u7f13\u89e3\u8fd9\u4e00\u95ee\u9898\u5e76\u4fc3\u8fdb\u81ea\u6211\u8fdb\u5316\u81f3\u5173\u91cd\u8981\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u63a8\u7406\u5668\uff08Reasoner\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u63a8\u7406\u5668\u662f\u7cfb\u7edf\u7684\u6838\u5fc3\u89d2\u8272\uff0c\u8d1f\u8d23\u63a5\u6536\u6765\u81ea\u4efb\u52a1\u521b\u5efa\u5668\u7684\u8f93\u5165\u5e76\u901a\u8fc7\u9010\u6b65\u63a8\u7406\u751f\u6210\u89e3\u51b3\u65b9\u6848\u3002\u5728\u672c\u7814\u7a76\u4e2d\uff0c\u63a8\u7406\u5668\u7531\u5927\u8bed\u8a00\u6a21\u578b\u5b9e\u73b0\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30\u5668\uff08Evaluator\uff09\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8bc4\u4f30\u5668\u8d1f\u8d23\u8bc4\u4f30\u548c\u9a8c\u8bc1\u63a8\u7406\u5668\u751f\u6210\u7684\u63a8\u7406\u8fc7\u7a0b\u3002\u8fd9\u4e00\u8f85\u52a9\u6a21\u5757\u6709\u51e0\u4e2a\u5173\u952e\u529f\u80fd\uff1a\u5728\u8bad\u7ec3\u9636\u6bb5\uff0c\u5b83\u63d0\u4f9b\u57fa\u4e8e\u5206\u6570\u7684\u53cd\u9988\u6765\u5fae\u8c03\u63a8\u7406\u5668\uff0c\u4f8b\u5982\u62d2\u7edd\u5fae\u8c03\u6216\u5f3a\u5316\u5b66\u4e60\uff1b\u5728\u63a8\u7406\u9636\u6bb5\uff0c\u5b83\u8bc4\u4f30\u63a8\u7406\u8fc7\u7a0b\uff0c\u4ece\u800c\u6307\u5bfc\u63a8\u7406\u65f6\u8ba1\u7b97\u548c\u540e\u5904\u7406\u6b65\u9aa4\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u540e\u5904\u7406\u5668\uff08Post-Processor\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u540e\u5904\u7406\u5668\u57fa\u4e8e\u8bc4\u4f30\u5668\u53cd\u9988\uff0c\u5904\u7406\u63a8\u7406\u5668\u751f\u6210\u7684\u89e3\u51b3\u65b9\u6848\u3002\u6700\u7b80\u5355\u7684\u64cd\u4f5c\u662f\u76f4\u63a5\u8fc7\u6ee4\u6389\u9519\u8bef\u7684\u89e3\u51b3\u65b9\u6848\uff1b\u7136\u800c\uff0c\u8fd9\u79cd\u65b9\u6cd5\u53ef\u80fd\u5bfc\u81f4\u6570\u636e\u6d6a\u8d39\uff0c\u4e14\u4e0e\u4eba\u7c7b\u5904\u7406\u9519\u8bef\u7684\u65b9\u5f0f\u4e0d\u5c3d\u76f8\u540c\u3002\u540e\u5904\u7406\u5206\u4e3a\u4e24\u4e2a\u9636\u6bb5\uff1a\u5728\u751f\u6210\u8fc7\u7a0b\u4e2d\uff0c\u5b83\u53ef\u4ee5\u901a\u8fc7\u4fee\u6b63\u9519\u8bef\u7684\u6b65\u9aa4\u6216\u56de\u6eaf\u6765\u4f18\u5316\u90e8\u5206\u601d\u7ef4\u94fe\uff1b\u5728\u751f\u6210\u540e\uff0c\u5b83\u5229\u7528\u7cfb\u7edf\u7684\u7ea0\u6b63\u80fd\u529b\u6765\u5b8c\u5584\u5b8c\u6574\u7684\u89e3\u51b3\u65b9\u6848\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u4e9b\u6a21\u5757\u5728\u903b\u8f91\u4e0a\u662f\u4e0d\u540c\u7684\uff0c\u800c\u975e\u7269\u7406\u4e0a\u3002\u7531\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u5f3a\u5927\u7684\u6307\u4ee4\u9075\u5faa\u80fd\u529b\uff0c\u5355\u4e2a\u6a21\u578b\u53ef\u4ee5\u5728\u5b9e\u73b0\u8fc7\u7a0b\u4e2d\u540c\u65f6\u5c65\u884c\u591a\u4e2a\u89d2\u8272\u3002\u5728\u63a5\u4e0b\u6765\u7684\u7ae0\u8282\u4e2d\uff0c\u6211\u4eec\u5c06\u63a2\u8ba8\u5b83\u4eec\u5728<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6570\u636e\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff08\u5171\u540c\u751f\u6210\u9ad8\u8d28\u91cf\u6570\u636e\uff09<\/span><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6a21\u578b\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff08\u4f18\u5316\u6bcf\u4e2a\u6a21\u5757\uff09<\/span><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff08\u6a21\u5757\u7684\u8054\u5408\u8fdb\u5316\uff09<\/span><span>\u4e2d\u7684\u91cd\u8981\u4f5c\u7528\u3002<\/span><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>3.&nbsp;<\/span><span>\u6570\u636e\u8fdb\u5316<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.27765726681127983\" data-type=\"png\" data-w=\"922\" style=\"height: auto !important\" data-width=\"922\" data-height=\"256\" data-imgfileid=\"100227500\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-dd26d538b8f72a3ce022b6ceb28cb033.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jp91y22q4\" data-pm-slice=\"0 0 []\"><span>\u56fe3\uff1a\u6570\u636e\u8fdb\u5316\u6d41\u7a0b\u7531\u4efb\u52a1\u8fdb\u5316\u548c\u601d\u7ef4\u94fe\u8fdb\u5316\u7ec4\u6210\u3002\u5728\u601d\u7ef4\u94fe\u8fdb\u5316\u4e2d\uff0c\u6211\u4eec\u5b9a\u4e49\u4e86\u4e09\u79cd\u5143\u64cd\u4f5c\u7b26\uff0c\u4f7f\u4e24\u79cd\u641c\u7d22\u8303\u5f0f\u80fd\u591f\u751f\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u601d\u7ef4\u94fe\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5982\u56fe3\u6240\u793a\uff0c\u6570\u636e\u8fdb\u5316\u5206\u4e3a\u4e24\u4e2a\u4e0d\u540c\u9636\u6bb5\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u601d\u7ef4\u94fe\u8fdb\u5316<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u4efb\u52a1\u8fdb\u5316\u7684\u6838\u5fc3\u662f\u901a\u8fc7\u89e3\u51b3\u5173\u952e\u56e0\u7d20\u6765\u63d0\u9ad8\u63a8\u7406\u4efb\u52a1\u7684\u8d28\u91cf\uff0c\u5982\uff1a\u96be\u5ea6\u3001\u591a\u6837\u6027\u548c\u6709\u6548\u6027\u7b49\u3002\u901a\u8fc7\u4f18\u5316\u8fd9\u4e9b\u7ef4\u5ea6\uff0c\u8be5\u7cfb\u7edf\u4e0d\u88ab\u5c40\u9650\u4e8e\u4efb\u52a1\u8303\u56f4\uff0c\u4ece\u800c\u663e\u8457\u63d0\u5347\u6cdb\u5316\u80fd\u529b\u3002\u601d\u7ef4\u94fe\u8fdb\u5316<\/span><span>\uff08\u63a8\u7406\u65f6\u8ba1\u7b97\uff09<\/span><span>\u65e8\u5728\u63d0\u9ad8\u63a8\u7406\u9636\u6bb5\u7684\u63a8\u7406\u8fc7\u7a0b\u8d28\u91cf\u3002\u8fd9\u79cd\u63d0\u5347\u4e3b\u8981\u4f53\u73b0\u5728\u63a8\u7406\u7684\u51c6\u786e\u6027\u3001\u903b\u8f91\u8fde\u8d2f\u6027\u4ee5\u53ca\u7cfb\u7edf\u81ea\u4e3b\u8bc4\u4f30\u548c\u4fee\u6b63\u9519\u8bef\u7684\u80fd\u529b\u4e0a\u3002<\/span><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">3.1 \u4efb\u52a1\u8fdb\u5316<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4efb\u52a1\u8fdb\u5316<\/span><span>\uff08Task evolution\uff09<\/span><span>\u4e13\u6ce8\u4e8e\u751f\u6210\u65b0\u4efb\u52a1\uff0c\u63d0\u9ad8\u8bad\u7ec3\u6570\u636e\u7684\u591a\u6837\u6027\u548c\u96be\u5ea6\uff0c\u4ee5\u589e\u5f3a\u6a21\u578b\u7684\u63a8\u7406\u548c\u6cdb\u5316\u80fd\u529b\u3002\u8fd9\u79cd\u65b9\u6cd5\u7c7b\u4f3c\u4e8e\u5b66\u751f\u901a\u8fc7\u7ec3\u4e60\u5404\u79cd\u4e60\u9898\u6765\u63d0\u9ad8\u6280\u80fd\u3002\u5c3d\u7ba1\u4efb\u52a1\u8fdb\u5316\u81f3\u5173\u91cd\u8981\uff0c\u4f46\u6211\u4eec\u89c2\u5bdf\u5230\u73b0\u6709\u7814\u7a76\u5f88\u5c11\u63a2\u7d22\u8fd9\u4e00\u7ef4\u5ea6\u3002\u56e0\u6b64\uff0c\u6211\u4eec\u5c06\u4efb\u52a1\u8fdb\u5316\u4f5c\u4e3a\u8d77\u70b9\u3002\u4f5c\u4e3a\u6570\u636e\u5408\u6210\u7684\u5173\u952e\u7ec4\u6210\u90e8\u5206\uff0c\u4efb\u52a1\u8fdb\u5316\u901a\u5e38\u6d89\u53ca\u521b\u5efa\u65b0\u4efb\u52a1\uff0c\u4f8b\u5982\u5229\u7528\u66f4\u5148\u8fdb\u7684\u5927\u8bed\u8a00\u6a21\u578b\u6765\u5236\u5b9a\u65b0\u6311\u6218<\/span><span>&nbsp;[Li\u7b492024a]<\/span><span>\u3002\u5728\u672c\u8282\u4e2d\uff0c\u6211\u4eec\u4e13\u6ce8\u4e8e\u589e\u5f3a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u591a\u6837\u6027\uff08task diversity\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u590d\u6742\u6027\uff08task complexity\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u53ef\u9760\u6027\uff08task reliability\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u4ee5\u6709\u6548\u652f\u6301\u4efb\u52a1\u8fdb\u5316\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u591a\u6837\u6027 \uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e3a\u4e86\u589e\u5f3a\u4efb\u52a1\u591a\u6837\u6027\uff0cHaluptzok\u7b49<\/span><span>[2022]<\/span><span>\u3001Madaan\u7b49<\/span><span>[2023a]<\/span><span>\u4f7f\u7528\u5927\u8bed\u8a00\u6a21\u578b\u4fee\u6539\u53c2\u8003\u95ee\u9898\u7684\u6570\u636e\u7c7b\u578b\u548c\u903b\u8f91\u64cd\u4f5c\uff0c\u751f\u6210\u7ed3\u6784\u76f8\u4f3c\u4f46\u903b\u8f91\u4e0d\u540c\u7684\u4efb\u52a1\u3002Yu\u7b49<\/span><span>[2023b]<\/span><span>\u5219\u4f7f\u7528\u5927\u8bed\u8a00\u6a21\u578b\u91cd\u65b0\u8868\u8ff0\u53c2\u8003\u95ee\u9898\u6765\u521b\u5efa\u65b0\u95ee\u9898\u3002\u7136\u800c\uff0c\u6b64\u7c7b\u65b9\u6cd5\u53d7\u9650\u4e8e\u5bf9\u53c2\u8003\u6570\u636e\u7684\u4f9d\u8d56\uff0c\u4ece\u800c\u9650\u5236\u4e86\u5168\u65b0\u4efb\u52a1\u7684\u751f\u6210\uff0c\u5e76\u524a\u5f31\u4e86\u591a\u6837\u6027\u548c\u521b\u9020\u6027\u3002\u4e3a\u7a81\u7834\u8fd9\u4e00\u5c40\u9650\uff0c\u6709\u4eba\u63d0\u51fa\u4ece\u9ad8\u65b9\u5dee\u5206\u5e03\u4e2d\u91c7\u6837\u6570\u636e\u6216\u5f15\u5165\u805a\u7126\u591a\u6837\u6027\u7684\u63d0\u793a\u8bcd\u3002\u4f8b\u5982\uff0cLiu\u7b49<\/span><span>[2023]<\/span><span>\u91c7\u7528\u6e29\u5ea6\u91c7\u6837\u548c\u6ce8\u91cd\u591a\u6837\u6027\u7684\u63d0\u793a\u8bcd\u6765\u751f\u6210\u591a\u6837\u5316\u7684\u95ee\u9898\uff0c\u800cXu\u7b49<\/span><span>[2023]<\/span><span>\u5219\u660e\u786e\u6307\u793a\u5927\u8bed\u8a00\u6a21\u578b\u521b\u5efa\u7f55\u89c1\u7684\u3001\u7279\u5b9a\u9886\u57df\u7684\u95ee\u9898\u3002\u6b64\u5916\uff0cSelf-Instruct<\/span><span>[Wang\u7b49\uff0c2022]<\/span><span>\u901a\u8fc7\u7ed3\u5408\u4eba\u5de5\u7f16\u5199\u548c\u6a21\u578b\u751f\u6210\u7684\u4efb\u52a1\u6765\u751f\u6210\u65b0\u7684\u4efb\u52a1\u6307\u4ee4\uff0c\u5e76\u4f7f\u7528\u7279\u5b9a\u7684\u63d0\u793a\u6a21\u677f\u5f15\u5bfc\u751f\u6210\u8fc7\u7a0b\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u590d\u6742\u6027\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>Xu\u7b49<\/span><span>[2023]<\/span><span>\u63d0\u51fa\u4e86\u51e0\u79cd\u57fa\u4e8e\u793a\u4f8b\u95ee\u9898\u751f\u6210\u590d\u6742\u4efb\u52a1\u7684\u65b9\u6cd5\uff1a1\uff09 \u6dfb\u52a0\u7ea6\u675f\uff1a\u901a\u8fc7\u5f15\u5165\u989d\u5916\u7684\u7ea6\u675f\u6216\u8981\u6c42\u6765\u63d0\u9ad8\u4efb\u52a1\u96be\u5ea6\uff0c\u4ece\u800c\u589e\u5f3a\u6a21\u578b\u7684\u7075\u6d3b\u6027\u548c\u9002\u5e94\u6027\uff1b2\uff09 \u6df1\u5316\uff1a\u6269\u5c55\u793a\u4f8b\u4e2d\u67e5\u8be2\u7684\u6df1\u5ea6\u548c\u5e7f\u5ea6\uff0c\u4ee5\u63d0\u5347\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\uff1b3\uff09 \u5177\u4f53\u5316\uff1a\u5c06\u95ee\u9898\u4e2d\u7684\u4e00\u822c\u6982\u5ff5\u66ff\u6362\u4e3a\u5177\u4f53\u6982\u5ff5\uff0c\u4f7f\u6307\u4ee4\u66f4\u52a0\u6e05\u6670\uff0c\u4ece\u800c\u63d0\u9ad8\u54cd\u5e94\u7684\u51c6\u786e\u6027\u548c\u76f8\u5173\u6027\uff1b4\uff09 \u589e\u52a0\u63a8\u7406\u6b65\u9aa4\uff1a\u91cd\u65b0\u5236\u5b9a\u7b80\u5355\u95ee\u9898\uff0c\u8981\u6c42\u989d\u5916\u7684\u63a8\u7406\u6b65\u9aa4\uff0c\u4ece\u800c\u589e\u5f3a\u6a21\u578b\u7684\u903b\u8f91\u601d\u7ef4\u80fd\u529b\uff1b5\uff09 \u589e\u52a0\u8f93\u5165\u590d\u6742\u6027\uff1a\u901a\u8fc7\u4fee\u6539\u95ee\u9898\u6761\u4ef6\uff0c\u5f15\u5165\u7ed3\u6784\u5316\u6570\u636e\u6216\u7279\u5b9a\u8f93\u5165\u683c\u5f0f<\/span><span>\uff08\u5982\u4ee3\u7801\u3001\u8868\u683c\u3001XML\u7b49\uff09<\/span><span>\uff0c\u5c06\u95ee\u9898\u4ece\u76f4\u63a5\u53ef\u8ba1\u7b97\u8f6c\u53d8\u4e3a\u9700\u8981\u989d\u5916\u6570\u636e\u89e3\u6790\u6216\u64cd\u4f5c\u7684\u5f62\u5f0f\uff0c\u4ece\u800c\u63d0\u5347\u6a21\u578b\u7684\u9c81\u68d2\u6027\u548c\u6cdb\u5316\u80fd\u529b\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0cShi\u7b49<\/span><span>[2023]<\/span><span>\u901a\u8fc7\u5f15\u5165\u65e0\u5173\u6761\u4ef6\u6765\u589e\u52a0\u63a8\u7406\u96be\u5ea6\uff0c\u8feb\u4f7f\u6a21\u578b\u8bc6\u522b\u5e76\u805a\u7126\u4e8e\u5173\u952e\u6761\u4ef6\uff0c\u800cMitra\u7b49<\/span><span>[2024]<\/span><span>\u5219\u901a\u8fc7\u5728\u95ee\u9898\u4e2d\u5d4c\u5165\u7b54\u6848\uff0c\u5c06\u95ee\u9898\u91cd\u65b0\u8868\u8ff0\u4e3a\u9648\u8ff0\u6027\u8bed\u53e5\uff0c\u5e76\u501f\u52a9\u81ea\u52a8\u5316\u5efa\u8bae<\/span><span>\uff08\u5982\u5f15\u5165\u989d\u5916\u53d8\u91cf\uff09<\/span><span>\u6765\u6307\u5bfc\u751f\u6210\u66f4\u590d\u6742\u7684\u95ee\u9898\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4efb\u52a1\u53ef\u9760\u6027\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u81ea\u52a8\u751f\u6210\u4efb\u52a1\u53ef\u80fd\u4f1a\u4ea7\u751f\u65e0\u6cd5\u89e3\u51b3\u7684\u4efb\u52a1\u6216\u4e0d\u6b63\u786e\u7684\u7b54\u6848\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0cLi \u7b49<\/span><span>&nbsp;[2023a]&nbsp;<\/span><span>\u91c7\u7528\u5fae\u8c03\u7684\u5927\u8bed\u8a00\u6a21\u578b<\/span><span>\uff08LLMs\uff09<\/span><span>\u5bf9\u4efb\u52a1\u8fdb\u884c\u8bc4\u5206\u5e76\u7b5b\u9009\u9ad8\u8d28\u91cf\u4efb\u52a1\u3002\u7c7b\u4f3c\u5730\uff0cLiu \u7b49<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u548c Xu \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u57fa\u4e8e\u539f\u59cb\u95ee\u9898\u751f\u6210\u591a\u79cd\u4efb\u52a1\uff0c\u5e76\u901a\u8fc7\u9a8c\u8bc1\u7b54\u6848\u6765\u8fc7\u6ee4\u4e0d\u4e00\u81f4\u7684\u4efb\u52a1\u3002Haluptzok \u7b49<\/span><span>&nbsp;[2022]&nbsp;<\/span><span>\u548c Liu \u7b49&nbsp;<\/span><span>[2023]&nbsp;<\/span><span>\u5219\u5229\u7528 Python \u89e3\u91ca\u5668\u548c\u9884\u5b9a\u4e49\u89c4\u5219<\/span><span>\uff08\u5982\u68c0\u67e5\u4efb\u52a1\u957f\u5ea6\u6216\u6570\u503c\u5185\u5bb9\uff09<\/span><span>\u6765\u9a8c\u8bc1\u6b63\u786e\u6027\uff0c\u4ece\u800c\u786e\u4fdd\u4efb\u52a1\u8d28\u91cf\u3002Kreber \u548c Hahn [2021] \u63d0\u51fa\u4e86\u4e00\u79cd\u57fa\u4e8e Transformer \u7f16\u7801\u5668\u7684\u751f\u6210\u5bf9\u6297\u7f51\u7edc<\/span><span>\uff08GAN\uff09[Goodfellow \u7b49\uff0c2014]<\/span><span>\uff0c\u901a\u8fc7\u968f\u673a\u566a\u58f0\u751f\u6210\u7b26\u53f7\u4efb\u52a1\u3002\u8bc4\u5224\u5668\u8bc4\u4f30\u751f\u6210\u4efb\u52a1\u4e0e\u771f\u5b9e\u6570\u636e\u4e4b\u95f4\u7684\u76f8\u4f3c\u6027\uff0c\u5e76\u63d0\u4f9b\u53cd\u9988\u4ee5\u4f18\u5316\u751f\u6210\u5668\uff0c\u4ece\u800c\u63d0\u9ad8\u4efb\u52a1\u53ef\u9760\u6027\u3002\u6b64\u5916\uff0cWei \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u548c Lu \u7b49&nbsp;<\/span><span>[2024b]&nbsp;<\/span><span>\u63a2\u7d22\u4e86\u53cd\u5411\u4efb\u52a1\u751f\u6210\u65b9\u6cd5\uff0c\u5229\u7528\u5927\u8bed\u8a00\u6a21\u578b\u4ece\u89e3\u51b3\u65b9\u6848\u4e2d\u63a8\u5bfc\u95ee\u9898\u3002\u5177\u4f53\u800c\u8a00\uff0cLu \u7b49&nbsp;<\/span><span>[2024b]&nbsp;<\/span><span>\u4ece\u6570\u5b66\u53c2\u8003\u89e3\u51b3\u65b9\u6848\u4e2d\u8fed\u4ee3\u751f\u6210\u65b0\u7b54\u6848\uff0c\u5b9a\u4e49\u7ea6\u675f\u6761\u4ef6\u548c\u903b\u8f91\u5173\u7cfb\uff0c\u5e76\u5c06\u8fd9\u4e9b\u7b54\u6848\u8f6c\u5316\u4e3a\u4efb\u52a1\uff0c\u4ece\u800c\u786e\u4fdd\u751f\u6210\u95ee\u9898\u7684\u53ef\u9760\u6027\u3002\u7c7b\u4f3c\u5730\uff0cWei \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u5229\u7528\u9ad8\u8d28\u91cf\u7684\u5f00\u6e90\u4ee3\u7801\uff0c\u901a\u8fc7\u5927\u8bed\u8a00\u6a21\u578b\u751f\u6210\u7f16\u7a0b\u4efb\u52a1\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">3.2 \u601d\u7ef4\u94fe\u8fdb\u5316<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u5f00\u59cb\u63a8\u7406\u8fc7\u7a0b\u4e4b\u524d\uff0c\u9700\u8981\u6784\u601d\u63a8\u7406\u94fe<\/span><span>\uff08Chain of Thoughts, CoT\uff09<\/span><span>\u7684\u7406\u60f3\u5f62\u6001\uff0c\u4ee5\u53ca\u5b83\u5e94\u8be5\u5305\u542b\u54ea\u4e9b\u5143\u64cd\u4f5c\u3002\u63a8\u7406\u94fe\u7684\u683c\u5f0f\u51b3\u5b9a\u4e86\u7cfb\u7edf\u63a8\u7406\u80fd\u529b\u7684\u4e0a\u9650\u3002\u5728\u672c\u8282\u4e2d\uff0c\u6211\u4eec\u9996\u5148\u5b9a\u4e49\u4e09\u4e2a\u5143\u64cd\u4f5c\uff0c\u4ee5\u6784\u5efa\u66f4\u5f3a\u5927\u7684\u63a8\u7406\u94fe\uff0c\u5305\u62ec\u9010\u6b65\u63a8\u7406<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\uff08Step-by-Step Reasoning\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001\u8bc4\u4f30<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\uff08Evaluation\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c\u540e\u5904\u7406<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\uff08Post-Processing\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6211\u4eec\u56de\u987e\u4e86\u63a8\u7406\u4e2d\u7684\u8ba1\u7b97\u65b9\u6cd5\uff0c\u8fd9\u4e9b\u65b9\u6cd5\u901a\u5e38\u901a\u8fc7\u641c\u7d22\u751f\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u601d\u7ef4\u94fe\uff0c\u5206\u4e3a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u663e\u5f0f\u6811\u641c\u7d22\uff08explicit tree search\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\uff08implicit trial-and-error search\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e24\u7c7b\u3002\u65e9\u671f\u7814\u7a76\u96c6\u4e2d\u5728\u663e\u5f0f\u6811\u641c\u7d22\u4e0a\uff0c\u4f46\u968f\u7740O1\u53ca\u5176\u540e\u7eed\u5f00\u6e90\u9879\u76ee<\/span><span>\uff08\u5982 R1 [DeepSeek-AI \u7b49\uff0c2025]\u3001Kimi k1.5 [Team \u7b49\uff0c2025] \u548c T1 [Hou \u7b49\uff0c2025]\uff09<\/span><span>\uff0c\u7814\u7a76\u9010\u6e10\u8f6c\u5411\u8bd5\u9519\u641c\u7d22\u3002O1\u63d0\u4f9b\u7684\u601d\u7ef4\u94fe\u793a\u4f8b\u8868\u660e\uff0c\u5b83\u80fd\u5728\u53d1\u73b0\u9519\u8bef\u65f6\u81ea\u6211\u7ea0\u6b63\u6216\u56de\u6eaf\uff0c\u5e76\u8bb0\u5f55\u6574\u4e2a\u63a8\u7406\u8fc7\u7a0b\uff0c\u6a21\u4eff\u4eba\u7c7b\u5728\u56de\u7b54\u524d\u7684\u6df1\u601d\u719f\u8651\u3002O1 Journey&nbsp;<\/span><span>&nbsp;[Qin \u7b49\uff0c2024]&nbsp;<\/span><span>&nbsp;\u65e9\u671f\u63a2\u8ba8\u4e86\u8fd9\u4e00\u70b9\uff0c\u63d0\u51fa\u4e86\u201c\u6377\u5f84\u5b66\u4e60\u201d<\/span><span>\uff08Shortcut Learning\uff09[Geirhos \u7b49\uff0c2020]&nbsp;<\/span><span>\u6982\u5ff5\uff0c\u63cf\u8ff0\u8ffd\u6c42\u6bcf\u4e00\u6b65\u90fd\u6b63\u786e\u7684\u601d\u7ef4\u94fe\uff0c\u5e76\u901a\u8fc7\u201c\u65c5\u7a0b\u5b66\u4e60\u201d<\/span><span>\uff08Journey Learning\uff09[Qin \u7b49\uff0c2024]&nbsp;<\/span><span>\u8868\u793a\u63a8\u7406\u8fc7\u7a0b\u4e2d\u7684\u81ea\u6211\u9a8c\u8bc1\u3001\u9519\u8bef\u68c0\u6d4b\u548c\u4fee\u6b63\u3002Kimi k1.5<\/span><span>[Team \u7b49\uff0c2025]<\/span><span>&nbsp;\u548cRedsta<\/span><span>r[Xu \u7b49\uff0c2025]&nbsp;<\/span><span>\u8fdb\u4e00\u6b65\u7814\u7a76\u8fd9\u4e00\u6982\u5ff5\uff0c\u5e76\u79f0\u4e4b\u4e3a\u201c\u957f\u601d\u7ef4\u94fe\u201d<\/span><span>\uff08Long CoT\uff09<\/span><span>\u3002\u4e0e\u6b64\u4e00\u81f4\uff0c\u6211\u4eec\u5c06\u6377\u5f84\u5b66\u4e60\u7684\u7ed3\u679c\u79f0\u4e3a\u201c\u77ed\u601d\u7ef4\u94fe\u201d<\/span><span>\uff08Short CoT\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">3.2.1 \u5143\u64cd\u4f5c\u7b26<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u601d\u7ef4\u94fe\u63a8\u7406\u7684\u6f5c\u529b\u5df2\u88ab\u5e7f\u6cdb\u63a2\u7d22\u3002\u867d\u7136\u666e\u901a\u601d\u7ef4\u94fe<\/span><span>\uff08vanilla CoT\uff09<\/span><span>\u5728\u7b80\u5355\u4efb\u52a1\u4e0a\u8868\u73b0\u826f\u597d\uff0c\u4f46\u5728\u66f4\u590d\u6742\u4efb\u52a1\u4e2d\u8868\u73b0\u4e0d\u4f73\u3002\u589e\u5f3a\u601d\u7ef4\u94fe\u63a8\u7406\u7684\u4e00\u79cd\u65b9\u6cd5\u662f\u8bbe\u8ba1\u53d7\u4eba\u7c7b\u8ba4\u77e5\u542f\u53d1\u7684\u66f4\u590d\u6742\u4e14\u9ad8\u6548\u7684\u63a8\u7406\u94fe\u3002\u5bf9\u7c7b O1 \u7cfb\u7edf<\/span><span>&nbsp;[Qin \u7b49\uff0c2024; Zeng \u7b49\uff0c2024b]<\/span><span>&nbsp;\u7684\u89c2\u5bdf\u5f15\u53d1\u4e86\u5bf9\u5206\u89e3\u3001\u9010\u6b65\u63a8\u7406\u3001\u81ea\u6211\u8bc4\u4f30\u3001\u81ea\u6211\u7ea0\u6b63\u548c\u56de\u6eaf\u7b49\u64cd\u4f5c\u7684\u8ba8\u8bba\u3002\u56e0\u6b64\uff0c\u6211\u4eec\u603b\u7ed3\u5e76\u5f52\u7eb3\u4e86\u4e09\u4e2a\u5173\u952e\u5143\u64cd\u4f5c\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u9010\u6b65\u63a8\u7406\uff08Step-by-step Reasoning\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30\uff08Evaluation\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u540e\u5904\u7406\uff08Post-processing\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6211\u4eec\u805a\u7126\u8fd9\u4e09\u4e2a\u5143\u64cd\u4f5c\u7b26\uff0c\u5e76\u56de\u987e\u524d\u4e00\u8282<\/span><span>\uff08\u00a72.2\uff09<\/span><span>\u4e2d\u63d0\u5230\u7684\u6a21\u5757\uff0c\u53ef\u4ee5\u5c06\u601d\u7ef4\u94fe\u683c\u5f0f\u4e0e\u63a8\u7406\u7cfb\u7edf\u6a21\u5757\u76f8\u8fde\u63a5\u3002\u63a8\u7406\u5668<\/span><span>\uff08Reasoner\uff09<\/span><span>\u901a\u8fc7\u9010\u6b65\u5206\u89e3\u751f\u6210\u63a8\u7406\u8fc7\u7a0b\uff0c\u641c\u7d22\u7b97\u6cd5\u4f5c\u4e3a\u5176\u6269\u5c55\u6280\u672f\u3002\u8bc4\u4f30\u5668<\/span><span>\uff08Evaluator\uff09<\/span><span>\u548c\u540e\u5904\u7406\u5668<\/span><span>\uff08Post-Processor\uff09<\/span><span>\u5206\u522b\u7ba1\u7406\u601d\u7ef4\u94fe\u4e2d\u7684\u8bc4\u4f30\u548c\u7ea0\u6b63\u8fc7\u7a0b\u3002\u6574\u5408\u8fd9\u4e09\u4e2a\u6a21\u5757\u540e\uff0c\u6211\u4eec\u53ef\u4ee5\u6784\u5efa\u4e00\u4e2a\u5168\u9762\u4e14\u7a33\u5065\u7684\u63a8\u7406\u7cfb\u7edf\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u9010\u6b65\u63a8\u7406<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9010\u6b65\u63a8\u7406\u5c06\u95ee\u9898\u5206\u89e3\u4e3a\u4f9d\u5e8f\u6b65\u9aa4\uff0c\u8fd9\u9700\u8981\u5f3a\u5927\u7684\u89c4\u5212\u80fd\u529b\uff0c\u7136\u540e\u901a\u8fc7\u57fa\u4e8e\u94fe\u7684\u63a8\u7406\u8fc7\u7a0b\u9010\u6b65\u89e3\u51b3\u95ee\u9898<\/span><span>&nbsp;[Chu \u7b49\uff0c2023]<\/span><span>\u3002\u6b64\u5916\uff0c\u5206\u89e3\u8fc7\u7a0b\u5e94\u8be5\u662f\u9012\u5f52\u7684\uff0c\u4ece\u800c\u4f7f\u7cfb\u7edf\u80fd\u591f\u8fed\u4ee3\u5730\u5206\u89e3\u590d\u6742\u7684\u5b50\u95ee\u9898\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u601d\u7ef4\u94fe<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>[Wei \u7b49\uff0c2022]&nbsp;<\/span><span>\u4ee3\u8868\u4e86\u4e00\u79cd\u76f4\u63a5\u7684\u7ebf\u6027\u641c\u7d22\u65b9\u6cd5\uff0c\u5229\u7528\u5c11\u6837\u672c<\/span><span>\uff08few-shot\uff09<\/span><span>\u6216\u96f6\u6837\u672c<\/span><span>\uff08zero-shot\uff09<\/span><span>\u63d0\u793a\u6765\u9010\u6b65\u89e3\u51b3\u95ee\u9898\u3002Plan-and-Solve&nbsp;<\/span><span>[Wang \u7b49\uff0c2023b]&nbsp;<\/span><span>\u91c7\u7528\u96f6\u6837\u672c\u63d0\u793a\u5f15\u5bfc\u6a21\u578b\u5728\u5355\u4e00\u751f\u6210\u8fc7\u7a0b\u4e2d\u751f\u6210\u8ba1\u5212\uff0c\u968f\u540e\u57fa\u4e8e\u751f\u6210\u7684\u8ba1\u5212\u8fdb\u884c\u94fe\u5f0f\u63a8\u7406\u3002\u4ece\u5c11\u5230\u591a\u63d0\u793a<\/span><span>\uff08Least-to-Most Prompting\uff09[Zhou \u7b49\uff0c2022]<\/span><span>&nbsp;\u91c7\u7528\u4e24\u9636\u6bb5\u65b9\u6cd5\uff1a\u7b2c\u4e00\u9636\u6bb5\uff0c\u95ee\u9898\u88ab\u660e\u786e\u5206\u89e3\u4e3a\u591a\u4e2a\u5b50\u95ee\u9898\uff1b\u7b2c\u4e8c\u9636\u6bb5\uff0c\u8fd9\u4e9b\u5b50\u95ee\u9898\u88ab\u987a\u5e8f\u89e3\u51b3\u3002\u524d\u4e00\u6b65\u7684\u7ed3\u679c\u88ab\u6dfb\u52a0\u81f3\u4e0a\u4e0b\u6587\uff0c\u5f15\u5bfc\u6a21\u578b\u7ee7\u7eed\u89e3\u51b3\u540e\u7eed\u5b50\u95ee\u9898\u3002\u3002\u4e0e\u57fa\u4e8e\u89c4\u5212\u7684\u65b9\u6cd5\u4e0d\u540c\uff0c\u8fde\u7eed\u63d0\u793a<\/span><span>\uff08Successive Prompting\uff09[Dua \u7b49\uff0c2022]&nbsp;<\/span><span>\u91c7\u7528\u8fed\u4ee3\u5206\u89e3\u8fc7\u7a0b\u3002\u5728\u6bcf\u6b21\u8fed\u4ee3\u4e2d\uff0c\u4e00\u4e2a\u65b0\u7684\u5b50\u95ee\u9898\u88ab\u63d0\u51fa\u5e76\u5728\u5f53\u524d\u6b65\u9aa4\u4e2d\u89e3\u51b3\u3002\u8fd9\u4e00\u4e24\u6b65\u8fc7\u7a0b\u91cd\u590d\u8fdb\u884c\uff0c\u76f4\u5230\u6574\u4e2a\u95ee\u9898\u88ab\u89e3\u51b3\u3002ReACT<\/span><span>&nbsp;[Yao \u7b49\uff0c2022]<\/span><span>&nbsp;\u5c06\u8fed\u4ee3\u63a8\u7406\u4e0e\u884c\u52a8\u76f8\u7ed3\u5408\u3002\u5728\u6bcf\u4e00\u6b65\u4e2d\uff0c\u6a21\u578b\u57fa\u4e8e\u5176\u63a8\u7406\u751f\u6210\u4e00\u4e2a\u884c\u52a8\u3002\u8be5\u884c\u52a8\u53ef\u80fd\u6d89\u53ca\u8c03\u7528\u5916\u90e8\u5de5\u5177<\/span><span>\uff08\u5982\u8ba1\u7b97\u5668\uff09<\/span><span>\u6216\u4e0e\u73af\u5883\u4ea4\u4e92\u3002\u968f\u540e\uff0c\u6a21\u578b\u4f7f\u7528\u6765\u81ea\u8fd9\u4e9b\u5916\u90e8\u5de5\u5177\u6216\u73af\u5883\u7684\u53cd\u9988\u8fdb\u884c\u4e0b\u4e00\u6b65\uff0c\u76f4\u5230\u8fbe\u5230\u6700\u7ec8\u76ee\u6807\u3002\u901a\u8fc7\u5f15\u5165\u884c\u52a8\uff0cReACT \u4f7f\u6a21\u578b\u80fd\u591f\u4e0e\u5916\u90e8\u7cfb\u7edf\u4ea4\u4e92\uff0c\u4ece\u800c\u589e\u5f3a\u5927\u8bed\u8a00\u6a21\u578b\u7684\u63a8\u7406\u8fc7\u7a0b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e00\u4e2a\u7a33\u5065\u7684\u63a8\u7406\u7cfb\u7edf\u987b\u5177\u5907\u81ea\u6211\u8bc4\u4f30\u80fd\u529b\uff0c\u4f7f\u5176\u80fd\u591f\u5728\u4efb\u52a1\u6267\u884c\u671f\u95f4\u548c\u4e4b\u540e\u8bc4\u4f30\u5176\u63a8\u7406\u8fc7\u7a0b\u3002\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\uff0c\u7cfb\u7edf\u5e94\u8bc6\u522b\u5e76\u7ec8\u6b62\u9519\u8bef\u7684\u63a2\u7d22\u8def\u5f84\u4ee5\u8fdb\u884c\u540e\u5904\u7406\u3002\u5728\u542f\u53d1\u5f0f\u641c\u7d22\u4e2d\uff0c\u8bc4\u4f30\u7ed3\u679c\u8fdb\u4e00\u6b65\u7528\u4e8e\u6307\u5bfc\u641c\u7d22\u3002\u5b8c\u6210\u63a8\u7406\u8fc7\u7a0b\u540e\uff0c\u53ef\u80fd\u4f1a\u751f\u6210\u591a\u4e2a\u5019\u9009\u7b54\u6848\uff0c\u8fd9\u5c31\u9700\u8981\u8fdb\u884c\u5f7b\u5e95\u8bc4\u4f30\uff0c\u4ee5\u6709\u6548\u5730\u8bc4\u4f30\u548c\u9a8c\u8bc1\u4e0d\u540c\u7684\u89e3\u51b3\u65b9\u6848\u3002\u6211\u4eec\u4ece\u4e09\u4e2a\u7c92\u5ea6\u56de\u987e\u73b0\u6709\u7814\u7a76\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u7ed3\u679c\u7ea7\uff08outcome-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6b65\u9aa4\u7ea7\uff08step-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\uff08token-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7ed3\u679c\u7ea7\u8bc4\u4f30<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp; \u65e9\u671f\u5de5\u4f5c\u4e3b\u8981\u5173\u6ce8\u7ed3\u679c\u7ea7\u8bc4\u4f30\uff0c\u5373\u5728\u63a8\u7406\u5b8c\u6210\u540e\u5bf9\u5b8c\u6574\u89e3\u51b3\u65b9\u6848\u8fdb\u884c\u8bc4\u4f30<\/span><span>&nbsp;[Cobbe \u7b49\uff0c2021; Wang \u7b49\uff0c2023c; Lee \u7b49\uff0c2024a]<\/span><span>\u3002\u8fd9\u4e9b\u65b9\u6cd5\u7684\u4e3b\u8981\u533a\u522b\u5728\u4e8e\u8bc4\u4f30\u7684\u5f62\u5f0f\u548c\u76ee\u7684\u3002\u5728\u8bad\u7ec3\u9636\u6bb5\uff0c\u5f53\u6b63\u786e\u7b54\u6848\u53ef\u7528\u65f6\uff0c\u4e00\u4e9b\u5de5\u4f5c\u5bf9\u7167\u6807\u51c6\u7b54\u6848\u5bf9\u89e3\u51b3\u65b9\u6848\u8fdb\u884c\u76f4\u63a5\u7684\u6b63\u786e\u6027\u8bc4\u4f30&nbsp;<\/span><span>[Cobbe \u7b49\uff0c2021; Hosseini \u7b49\uff0c2024]<\/span><span>\u3002\u9664\u4e86\u5355\u7eaf\u7684\u7b54\u6848\u51c6\u786e\u6027\u5916\uff0cR1&nbsp;<\/span><span>[DeepSeek-AI \u7b49\uff0c2025] \u548c T1 [Hou \u7b49\uff0c2025]&nbsp;<\/span><span>\u8fd8\u878d\u5165\u4e86\u57fa\u4e8e\u683c\u5f0f\u7684\u7ed3\u679c\u5956\u52b1\u6765\u6307\u5bfc\u63a8\u7406\u683c\u5f0f\u5b66\u4e60\u3002\u5728\u63a8\u7406\u9636\u6bb5\uff0cCobbe \u7b49<\/span><span>&nbsp;[2021]<\/span><span>\u3001Hosseini \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u5229\u7528\u8bad\u7ec3\u597d\u7684\u9a8c\u8bc1\u5668\u5bf9\u5019\u9009\u89e3\u51b3\u65b9\u6848\u8fdb\u884c\u8bc4\u5206\u548c\u6392\u540d\uff0c\u4ece\u800c\u9009\u62e9\u6700\u4f18\u89e3\u3002\u6b64\u5916\uff0c\u4e00\u4e9b\u65b9\u6cd5\u4f7f\u7528\u5927\u8bed\u8a00\u6a21\u578b\u5bf9\u89e3\u51b3\u65b9\u6848\u63d0\u4f9b\u81ea\u7136\u8bed\u8a00\u53cd\u9988\u3002\u4f8b\u5982\uff0cMadaan \u7b49<\/span><span>&nbsp;[2023b]<\/span><span>\u3001Zhang \u7b49<\/span><span>&nbsp;[2024b]&nbsp;<\/span><span>\u76f4\u63a5\u751f\u6210\u6279\u8bc4\uff0c\u800c Peng \u7b49<\/span><span>&nbsp;[2023]<\/span><span>\u3001Shinn \u7b49<\/span><span>&nbsp;[2023]<\/span><span>\u3001Gou \u7b49<\/span><span>&nbsp;[2024]<\/span><span>&nbsp;\u5728\u6279\u8bc4\u751f\u6210\u4e2d\u5305\u542b\u5185\u90e8\u548c\u5916\u90e8\u73af\u5883\u4fe1\u606f\u3002\u6b64\u5916\uff0cAnkner \u7b49&nbsp;<\/span><span>[2024b]<\/span><span>\u3001Yu \u7b49<\/span><span>&nbsp;[2024b]&nbsp;<\/span><span>\u5c06\u81ea\u7136\u8bed\u8a00\u6279\u8bc4\u4e0e\u8bc4\u5206\u673a\u5236\u76f8\u7ed3\u5408\uff0c\u4ee5\u63d0\u9ad8\u8bc4\u4f30\u7684\u53ef\u9760\u6027\u548c\u53ef\u89e3\u91ca\u6027\u3002\u4e00\u4e9b\u7814\u7a76\u8fd8\u91c7\u7528\u57fa\u4e8e\u4e00\u81f4\u6027\u7684\u8bc4\u4f30\u6846\u67b6\u3002\u4f8b\u5982\uff0cWang \u7b49<\/span><span>&nbsp;[2023c]&nbsp;<\/span><span>\u91c7\u7528\u6295\u7968\u7cfb\u7edf\u4ece\u591a\u4e2a\u89e3\u51b3\u65b9\u6848\u5019\u9009\u4e2d\u786e\u5b9a\u6700\u7ec8\u7b54\u6848\uff0c\u800c Jiang \u7b49<\/span><span>&nbsp;[2024b]<\/span><span>\u3001Weng \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u901a\u8fc7\u786e\u4fdd\u524d\u5411\u548c\u540e\u5411\u63a8\u7406\u8fc7\u7a0b\u4e4b\u95f4\u7684\u4e00\u81f4\u6027\u6765\u8bc4\u4f30\u7b54\u6848\u8d28\u91cf\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6b65\u9aa4\u7ea7\u8bc4\u4f30&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u867d\u7136\u7ed3\u679c\u7ea7\u8bc4\u4f30\u5b9e\u65bd\u7b80\u5355\uff0c\u4f46\u5728\u5b9e\u8df5\u4e2d\u5e94\u7528\u6709\u9650\uff0c\u5f80\u5f80\u9700\u8981\u66f4\u7ec6\u81f4\u7684\u8bc4\u4f30\u3002\u5176\u4e2d\uff0c\u6b65\u9aa4\u7ea7\u8bc4\u4f30\u5df2\u6210\u4e3a\u4e00\u79cd\u7279\u522b\u7a81\u51fa\u7684\u65b9\u6cd5\uff0c\u5f3a\u8c03\u5bf9\u5355\u4e2a\u63a8\u7406\u6b65\u9aa4\u7684\u8bc4\u4f30&nbsp;<\/span><span>[Lightman \u7b49\uff0c2024; Wang \u7b49\uff0c2024g,m; Gao \u7b49\uff0c2024a; Lu \u7b49\uff0c2024a; Li \u7b49\uff0c2023b]<\/span><span>\u3002\u5728\u6811\u641c\u7d22\u7b97\u6cd5\u4e2d\uff0c\u8fc7\u7a0b\u8bc4\u4f30\u88ab\u5e7f\u6cdb\u7528\u4e8e\u6307\u5bfc\u641c\u7d22\u8f68\u8ff9\u3002\u4f8b\u5982\uff0cTian \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u5728\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u4e2d\u4f7f\u7528\u72b6\u6001\u8bc4\u5206\u6765\u6307\u5bfc\u641c\u7d22\u8fc7\u7a0b\uff0c\u800c Xie \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u5728\u675f\u641c\u7d22\u4e2d\u5b9e\u73b0\u72b6\u6001\u8bc4\u5206\u4ee5\u4f18\u5316\u8def\u5f84\u9009\u62e9\u3002\u6b64\u5916\uff0c\u6b65\u9aa4\u7ea7\u8bc4\u4f30\u5728\u9519\u8bef\u7ea0\u6b63\u548c\u63a8\u7406\u6b65\u9aa4\u603b\u7ed3\u65b9\u9762\u90fd\u8bc1\u660e\u4e86\u5176\u6709\u6548\u6027\u3002\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cZheng \u7b49<\/span><span>&nbsp;[2024]<\/span><span>\u3001Xi \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u5df2\u5f00\u53d1\u51fa\u80fd\u591f\u7cbe\u786e\u5b9a\u4f4d\u7279\u5b9a\u63a8\u7406\u6b65\u9aa4\u4e2d\u7684\u4e0d\u51c6\u786e\u4e4b\u5904\u7684\u65b9\u6cd5\uff0c\u4ece\u800c\u63d0\u4f9b\u66f4\u7cbe\u786e\u548c\u53ef\u64cd\u4f5c\u7684\u53cd\u9988\uff0c\u7528\u4e8e\u5168\u9762\u8bc4\u4f30\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\u8bc4\u4f30&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e00\u4e9b\u7814\u7a76\u8ba4\u4e3a\uff0c\u6b65\u9aa4\u7ea7\u8bc4\u4f30\u7684\u7c92\u5ea6\u5bf9\u4e8e\u5168\u9762\u7684\u63a8\u7406\u8bc4\u4f30\u4ecd\u7136\u4e0d\u8db3&nbsp;<\/span><span>[Yoon \u7b49\uff0c2024; Chen \u7b49\uff0c2024h]<\/span><span>\u3002\u8fd9\u4fc3\u4f7f\u4e86\u8bcd\u5143\u7ea7\u8bc4\u4f30\u6846\u67b6\u7684\u53d1\u5c55\uff0c\u63d0\u4f9b\u4e86\u66f4\u9ad8\u7ec6\u7c92\u5ea6\u7684\u5206\u6790\u3002Yoon \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u5f15\u5165\u4e86\u4e00\u79cd\u65b9\u6cd5\uff0c\u5229\u7528\u5f3a\u5927\u7684\u5927\u8bed\u8a00\u6a21\u578b\u5728\u8bcd\u5143\u7ea7\u522b\u4e0a\u8fed\u4ee3\u4fee\u6539\u601d\u7ef4\u94fe\u63a8\u7406\u3002\u4ed6\u4eec\u7684\u65b9\u6cd5\u6839\u636e\u4fee\u6539\u64cd\u4f5c\u4e3a\u8bcd\u5143\u5206\u914d\u4e0d\u540c\u7684\u5956\u52b1\uff0c\u5e76\u5229\u7528\u8fd9\u4e9b\u5956\u52b1\u6765\u8bad\u7ec3\u8bcd\u5143\u7ea7\u5956\u52b1\u6a21\u578b\u3002\u7c7b\u4f3c\u5730\uff0cChen \u7b49<\/span><span>&nbsp;[2024h]&nbsp;<\/span><span>\u63d0\u51fa\u4e86\u4e00\u4e2a\u4e24\u9636\u6bb5\u6846\u67b6\uff0c\u9996\u5148\u8bad\u7ec3\u4e00\u4e2a\u7ea0\u6b63\u6a21\u578b\u6765\u8bc6\u522b\u548c\u7ea0\u6b63\u9519\u8bef\u7684\u63a8\u7406\u6b65\u9aa4\u3002\u901a\u8fc7\u5c06\u4f4e\u751f\u6210\u6982\u7387\u4e0e\u9519\u8bef\u8bcd\u5143\u5173\u8054\uff0c\u5c06\u9ad8\u6982\u7387\u4e0e\u6b63\u786e\u8bcd\u5143\u5173\u8054\uff0c\u4ed6\u4eec\u7684\u65b9\u6cd5\u80fd\u591f\u6784\u5efa\u7cbe\u786e\u7684\u8bcd\u5143\u7ea7\u5956\u52b1\u4fe1\u53f7\u3002\u6b64\u5916\uff0cLee \u7b49<\/span><span>&nbsp;[2024d]&nbsp;<\/span><span>\u63d0\u51fa\u4e86\u4e00\u4e2a\u8bcd\u5143\u76d1\u7763\u7684\u4ef7\u503c\u6a21\u578b\uff0c\u8be5\u6a21\u578b\u76d1\u7763\u5355\u4e2a\u8bcd\u5143\u4ee5\u63d0\u4f9b\u5bf9\u89e3\u51b3\u65b9\u6848\u6b63\u786e\u6027\u7684\u66f4\u51c6\u786e\u8bc4\u4f30\u3002\u540c\u65f6\uff0cYang \u7b49&nbsp;<\/span><span>[2024b]&nbsp;<\/span><span>\u57fa\u4e8e\u6700\u5927\u71b5\u5f3a\u5316\u5b66\u4e60\u539f\u7406\u63a8\u5bfc\u51fa\u4e86\u4e00\u79cd\u8bcd\u5143\u7ea7\u8bc4\u4f30\u65b9\u6848\u3002\u4ed6\u4eec\u7684\u65b9\u6cd5\u901a\u8fc7\u57fa\u4e8e\u6392\u540d\u7684\u622a\u65ad\u8ba1\u7b97\u8bcd\u5143\u7ea7\u4ef7\u503c\uff0c\u4e3a\u6bcf\u4e2a\u8bcd\u5143\u5206\u914d +1\u30010 \u6216 -1 \u7684\u79bb\u6563\u5956\u52b1\uff0c\u4ece\u800c\u5b9e\u73b0\u5bf9\u63a8\u7406\u8fc7\u7a0b\u7684\u7ec6\u7c92\u5ea6\u4f18\u5316\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u57fa\u4e8e\u8bc4\u4f30\u53cd\u9988\u7684\u5448\u73b0\u683c\u5f0f\uff0c\u73b0\u6709\u7684\u8bc4\u4f30\u65b9\u6cd5\u53ef\u5206\u4e3a\u4e24\u79cd\u4e0d\u540c\u8303\u5f0f\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u9a8c\u8bc1\u5668\uff08verifier\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8bc4\u8bba\u5668\uff08critic\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u9a8c\u8bc1\u5668\u4e13\u6ce8\u4e8e\u901a\u8fc7\u6807\u91cf\u8bc4\u5206\u91cf\u5316\u89e3\u51b3\u65b9\u6848\u8d28\u91cf\uff0c\u800c\u8bc4\u8bba\u5668\u5219\u4ee5\u81ea\u7136\u8bed\u8a00\u63d0\u4f9b\u53e3\u5934\u53cd\u9988\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u9a8c\u8bc1\u5668&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u9a8c\u8bc1\u5668\u8303\u5f0f\u901a\u8fc7\u5206\u914d\u91cf\u5316\u5206\u6570\u6765\u8bc4\u4f30\u89e3\u51b3\u65b9\u6848\u7684\u6b63\u786e\u6027\u3002\u4f8b\u5982\uff0cCobbe \u7b49[2021]\u4f7f\u7528\u9a8c\u8bc1\u5668\u6765\u4f30\u8ba1\u89e3\u51b3\u65b9\u6848\u6b63\u786e\u7684\u6982\u7387\uff0c\u800cHosseini \u7b49<\/span><span>[2024]<\/span><span>\u5229\u7528\u7ecf\u8fc7\u8bad\u7ec3\u7684DPO\u9a8c\u8bc1\u5668\u751f\u6210\u53cd\u6620\u89e3\u51b3\u65b9\u6848\u6709\u6548\u6027\u7684\u4f3c\u7136\u5206\u6570\u3002\u6b64\u5916\uff0c<\/span><span>[Lightman \u7b49\uff0c2024; Wang \u7b49\uff0c2024g; Lu \u7b49\uff0c2024a]<\/span><span>\u91c7\u7528\u6b65\u9aa4\u7ea7\u8bc4\u5206\u673a\u5236\uff0c\u5bf9\u5355\u4e2a\u63a8\u7406\u6b65\u9aa4\u5206\u914d\u5206\u6570\uff0c\u5e76\u4f7f\u7528\u6700\u5c0f\u503c\u6216\u5e73\u5747\u503c\u7b49\u6307\u6807\u805a\u5408\u5b83\u4eec\uff0c\u4ee5\u5f97\u51fa\u6574\u4f53\u89e3\u51b3\u65b9\u6848\u8d28\u91cf\u8bc4\u4f30\u3002<\/span><span>[Tian \u7b49\uff0c2024; Xie \u7b49\uff0c2023]<\/span><span>\u4e3a\u6811\u641c\u7d22\u8fc7\u7a0b\u4e2d\u7684\u6bcf\u4e2a\u72b6\u6001\u5206\u914d\u5206\u6570\uff0c\u4ee5\u4f18\u5316\u641c\u7d22\u8def\u5f84\u3002\u4e3a\u4e86\u66f4\u7ec6\u7684\u7c92\u5ea6\uff0c<\/span><span>[Yoon \u7b49\uff0c2024; Chen \u7b49\uff0c2024h; Lee \u7b49\uff0c2024d; Yang \u7b49\uff0c2024b]<\/span><span>\u5f15\u5165\u4e86\u8bcd\u5143\u7ea7\u8bc4\u5206\u673a\u5236\uff0c\u4e3a\u5355\u4e2a\u8bcd\u5143\u5206\u914d\u8fde\u7eed\u6216\u79bb\u6563\u5206\u6570<\/span><span>\uff08\u5982\u4e2d\u6027\u3001\u6b63\u786e\u6216\u9519\u8bef\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u8bba\u5668 &nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8bc4\u8bba\u5668\u8303\u5f0f\u751f\u6210\u81ea\u7136\u8bed\u8a00\u53cd\u9988\uff0c\u4ee5\u4fc3\u8fdb\u9519\u8bef\u6f84\u6e05\u5e76\u63d0\u9ad8\u8bc4\u5206\u673a\u5236\u7684\u53ef\u89e3\u91ca\u6027\u3002\u4f8b\u5982\uff0cMadaan \u7b49<\/span><span>[2023b]<\/span><span>\u5229\u7528\u6a21\u578b\u56fa\u6709\u7684\u80fd\u529b\u5bf9\u5176\u81ea\u8eab\u89e3\u51b3\u65b9\u6848\u4ea7\u751f\u6279\u5224\u6027\u53cd\u9988\uff0c\u5b9e\u73b0\u8fed\u4ee3\u6539\u8fdb\u3002\u540c\u65f6\uff0c<\/span><span>[Peng \u7b49\uff0c2023; Shinn \u7b49\uff0c2023; Gou \u7b49\uff0c2024]<\/span><span>\u901a\u8fc7\u7ed3\u5408\u5185\u90e8\u6a21\u578b\u72b6\u6001\u548c\u5916\u90e8\u73af\u5883\u4fe1\u606f\u6765\u6269\u5c55\u8fd9\u79cd\u65b9\u6cd5\uff0c\u751f\u6210\u5168\u9762\u7684\u6279\u8bc4\uff0c\u4e0d\u4ec5\u8bc6\u522b\u9519\u8bef\uff0c\u8fd8\u6307\u5bfc\u540e\u7eed\u6539\u8fdb\u3002\u8fdb\u4e00\u6b65\u63a8\u8fdb\u8fd9\u4e00\u5de5\u4f5c\uff0c<\/span><span>[Zheng \u7b49\uff0c2024; Xi \u7b49\uff0c2024]<\/span><span>\u8fdb\u884c\u7c92\u5ea6\u5316\u3001\u9010\u6b65\u7684\u6279\u5224\u6027\u5206\u6790\uff0c\u4ee5\u66f4\u8be6\u7ec6\u5730\u5b9a\u4f4d\u548c\u7ea0\u6b63\u9519\u8bef\u3002<\/span><span>[Ankner \u7b49\uff0c2024b; Yu \u7b49\uff0c2024b]<\/span><span>\u5c06\u6279\u8bc4\u751f\u6210\u4e0e\u8bc4\u5206\u673a\u5236\u6574\u5408\u3002\u901a\u8fc7\u5728\u5206\u914d\u5206\u6570\u4e4b\u524d\u751f\u6210\u81ea\u7136\u8bed\u8a00\u6279\u8bc4\uff0c\u8fd9\u4e9b\u65b9\u6cd5\u589e\u5f3a\u4e86\u8bc4\u4f30\u8fc7\u7a0b\u7684\u900f\u660e\u5ea6\u548c\u53ef\u9760\u6027\uff0c\u4e3a\u8bc4\u4f30\u89e3\u51b3\u65b9\u6848\u8d28\u91cf\u63d0\u4f9b\u4e86\u66f4\u53ef\u89e3\u91ca\u548c\u7a33\u5065\u7684\u6846\u67b6\u3002\u6b64\u5916\uff0cMCTS-Judge Wang \u7b49<\/span><span>[2025b]<\/span><span>\u4e5f\u5c06\u81ea\u6211\u8bc4\u4f30\u5efa\u6a21\u4e3a\u4e00\u7cfb\u5217\u5b50\u4efb\u52a1\uff0c\u5e76\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u5c06\u95ee\u9898\u5206\u89e3\u4e3a\u66f4\u7b80\u5355\u7684\u591a\u89d2\u5ea6\u8bc4\u4f30\u4efb\u52a1\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<section style=\"margin-bottom: 0px\"><span><br \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.26278836509528586\" data-type=\"png\" data-w=\"997\" style=\"height: auto !important\" data-width=\"997\" data-height=\"262\" data-imgfileid=\"100227501\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-754b42daf4ccd7bb7462d7ba995681ff.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jnzbhgs2\" data-pm-slice=\"0 0 []\"><span>\u56fe4\uff1a\u8bc4\u4f30\u540e\u7684\u4e09\u79cd\u540e\u5904\u7406\u65b9\u6cd5\uff1a\u8fc7\u6ee4\uff08Filter\uff09\u3001\u603b\u7ed3\uff08Summary\uff09\u548c\u7ea0\u6b63\uff08Correction\uff09\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30\u7684\u6311\u6218\uff1a\u5956\u52b1\u6b3a\u9a97&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5956\u52b1\u6b3a\u9a97\u88ab\u5b9a\u4e49\u4e3a\u7b56\u7565\u6a21\u578b\u5229\u7528\u5956\u52b1\u5b9a\u4e49\u4e2d\u7684\u6a21\u7cca\u6027\u6216\u6f0f\u6d1e\u83b7\u5f97\u9ad8\u5956\u52b1\uff0c\u800c\u5b9e\u9645\u4e0a\u5e76\u672a\u5b66\u4e60\u6240\u9700\u80fd\u529b\u7684\u60c5\u51b5<\/span><span>&nbsp;[Weng\uff0c2024]<\/span><span>\u3002\u9488\u5bf9\u7279\u5b9a\u9636\u6bb5\uff0c\u6709\u4e24\u6761\u4e3b\u8981\u8def\u5f84\u53ef\u4ee5\u7f13\u89e3\u5956\u52b1\u6b3a\u9a97\u3002\u5728\u5956\u52b1\u5efa\u6a21\u9636\u6bb5\uff0c\u8bbe\u8ba1\u66f4\u590d\u6742\u7684\u8fc7\u7a0b\u5956\u52b1\u53ef\u80fd\u6709\u52a9\u4e8e\u7f13\u89e3\u8fd9\u4e00\u95ee\u9898\u3002\u7136\u800c\uff0c\u8fc7\u4e8e\u590d\u6742\u7684\u5956\u52b1\u4fe1\u53f7\u4e5f\u53ef\u80fd\u6539\u53d8\u6536\u655b\u76ee\u6807\u3002\u53e6\u4e00\u79cd\u65b9\u6cd5\u662f\u653e\u5f03\u7ec6\u7c92\u5ea6\u7684\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRM\uff09<\/span><span>\u800c\u4ec5\u4f9d\u8d56\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08ORM\uff09<\/span><span>\uff0c\u8fd9\u5bf9\u63a8\u7406\u4efb\u52a1\u5c24\u4e3a\u9002\u7528\u3002\u4f8b\u5982\uff0cR1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u548cT1<\/span><span>&nbsp;[Hou \u7b49\uff0c2025]<\/span><span>\u4ec5\u91c7\u7528\u57fa\u4e8e\u7b54\u6848\u6b63\u786e\u6027\u548c\u683c\u5f0f\u5408\u89c4\u6027\u7684\u89c4\u5219\u578b\u7ed3\u679c\u5956\u52b1\uff0c\u6709\u6548\u7f13\u89e3\u4e86\u4f7f\u7528\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u65f6\u7684\u5956\u52b1\u6b3a\u9a97\u95ee\u9898\u3002\u6b64\u5916\uff0c\u4f7f\u7528\u66f4\u5927\u89c4\u6a21\u7684\u5927\u8bed\u8a00\u6a21\u578b\u4f5c\u4e3a\u57fa\u7840\u5956\u52b1\u6a21\u578b\u53ef\u4ee5\u63d0\u9ad8\u5176\u6cdb\u5316\u80fd\u529b\u5e76\u964d\u4f4e\u5229\u7528\u6f0f\u6d1e\u7684\u98ce\u9669\u3002\u540c\u65f6\uff0c\u5728\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\uff0c\u88c1\u526a\u548c\u5956\u52b1\u5851\u9020\u7b49\u673a\u5236\u53ef\u4ee5\u5728\u4e00\u5b9a\u7a0b\u5ea6\u4e0a\u5e2e\u52a9\u7f13\u89e3\u8fd9\u4e00\u95ee\u9898<\/span><span>&nbsp;[Gao \u7b49\uff0c2024b]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30\u7684\u6311\u6218\uff1a\u6cdb\u5316\u6027 &nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0c\u5956\u52b1\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u540c\u6837\u81f3\u5173\u91cd\u8981\u3002\u53c2\u6570\u5316\u8bc4\u4f30\u5668<\/span><span>\uff08\u5982\u5956\u52b1\u6a21\u578b\uff09<\/span><span>\u901a\u5e38\u5728\u7279\u5b9a\u6570\u636e\u5206\u5e03\u4e0a\u8bad\u7ec3\uff0c\u8fd9\u9650\u5236\u4e86\u5b83\u4eec\u5728\u5206\u5e03\u5916<\/span><span>\uff08out-of-distribution, OOD\uff09<\/span><span>\u4efb\u52a1\u4e0a\u7684\u9002\u7528\u6027\u3002\u8fd9\u79cd\u9650\u5236\u53ef\u80fd\u5bfc\u81f4\u5bf9\u65b0\u4efb\u52a1\u7684\u8bc4\u4f30\u51fa\u73b0\u504f\u5dee\u6216\u4e0d\u7a33\u5b9a\uff0c\u8fdb\u4e00\u6b65\u963b\u788d\u4efb\u52a1\u6cdb\u5316<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025; Cui \u7b49\uff0c2025]<\/span><span>\u3002\u56e0\u6b64\uff0c\u589e\u5f3a\u5956\u52b1\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\uff0c\u4ee5\u5728\u66f4\u5e7f\u6cdb\u7684\u4efb\u52a1\u8303\u56f4\u5185\u63d0\u4f9b\u53ef\u9760\u53cd\u9988\uff0c\u5bf9\u4e8e\u63d0\u9ad8\u4efb\u52a1\u6cdb\u5316\u81f3\u5173\u91cd\u8981\u3002\u4e00\u65b9\u9762\uff0c\u53ef\u4ee5\u4f18\u5148\u8003\u8651\u975e\u53c2\u6570\u5316\u8bc4\u4f30\u5668\uff0c\u5982\u7b54\u6848\u6b63\u786e\u6027\u6216\u683c\u5f0f\u51c6\u786e\u6027\uff0c\u4ee5\u7f13\u89e3\u8fd9\u4e9b\u95ee\u9898<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025; Hou \u7b49\uff0c2025]<\/span><span>\u3002\u53e6\u4e00\u65b9\u9762\uff0c\u5982\u679c\u5fc5\u987b\u4f7f\u7528\u53c2\u6570\u5316\u8bc4\u4f30\u5668\uff0c\u786e\u4fdd\u5176\u6301\u7eed\u66f4\u65b0\u81f3\u5173\u91cd\u8981\u3002\u4e00\u4e2a\u5173\u952e\u6311\u6218\u5728\u4e8e\u9ad8\u6548\u4e14\u7ecf\u6d4e\u5730\u6784\u5efa\u8fd9\u4e9b\u8bc4\u4f30\u5668\u7684\u8bad\u7ec3\u6570\u636e\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u50cf R1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]&nbsp;<\/span><span>\u8fd9\u6837\u7684\u5de5\u4f5c\u901a\u8fc7\u57fa\u4e8e\u89c4\u5219\u7684\u7ed3\u679c\u5956\u52b1\u89c4\u907f\u4e86\u73b0\u6709\u8bc4\u4f30\u5668\u4e2d\u7684\u5956\u52b1\u6b3a\u9a97\u548c\u6cdb\u5316\u9650\u5236\u95ee\u9898\uff0c\u4f46\u4e5f\u66b4\u9732\u4e86\u65b0\u7684\u6311\u6218\uff0c\u5982\u8fc7\u957f\u7684\u601d\u7ef4\u94fe\u3001\u4f4e\u6548\u7684\u53cd\u601d\u548c\u8fc7\u5ea6\u601d\u8003\u3002\u8fd9\u4e9b\u95ee\u9898\u8868\u660e\uff0c\u4ec5\u4f9d\u8d56\u7ed3\u679c\u5956\u52b1\u53ef\u80fd\u4e0d\u8db3\u3002\u66f4\u9ad8\u7ec6\u7c92\u5ea6\u7684\u6b65\u9aa4\u7ea7\u8bc4\u4f30\u53ef\u80fd\u6709\u52a9\u4e8e\u89e3\u51b3\u8fd9\u4e9b\u7f3a\u70b9\u3002\u7ed3\u5408\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRMs\uff09<\/span><span>\u548c\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08ORMs\uff09<\/span><span>\u7684\u4f18\u52bf\uff0c\u65e2\u80fd\u7f13\u89e3\u5956\u52b1\u6b3a\u9a97\u5e76\u786e\u4fdd\u6cdb\u5316\uff0c\u53c8\u80fd\u5b9e\u73b0\u7ec6\u7c92\u5ea6\u8bc4\u4f30\uff0c\u4ecd\u7136\u662f\u672a\u6765\u7814\u7a76\u4e2d\u7684\u91cd\u8981\u6311\u6218\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u540e\u5904\u7406<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5982\u56fe4\u6240\u793a\uff0c\u8bc4\u4f30\u540e\u7684\u63a8\u7406\u89e3\u51b3\u65b9\u6848\u53ef\u4ee5\u8fdb\u4e00\u6b65\u5904\u7406\u4ee5\u63d0\u9ad8\u5176\u8d28\u91cf\u548c\u53ef\u9760\u6027\u3002\u9996\u5148\uff0c\u53ef\u4ee5\u4f7f\u7528\u603b\u7ed3\u77e5\u8bc6<\/span><span>\uff08Summary Knowledge\uff09<\/span><span>\u4ece\u63a8\u7406\u8fc7\u7a0b\u4e2d\u63d0\u70bc\u548c\u603b\u7ed3\u5173\u952e\u4fe1\u606f\u3002\u5bf9\u4e8e\u4f4e\u8d28\u91cf\u7684\u63a8\u7406\u89e3\u51b3\u65b9\u6848\uff0c\u5e38\u89c1\u7684\u5904\u7406\u65b9\u6cd5\u5305\u62ec\u8fc7\u6ee4<\/span><span>\uff08Filtering\uff09<\/span><span>\u548c\u7ea0\u6b63<\/span><span>\uff08Correcting\uff09<\/span><span>\u3002\u8fc7\u6ee4\u76f4\u63a5\u79fb\u9664\u4e0d\u53ef\u9760\u7684\u89e3\u51b3\u65b9\u6848\uff0c\u800c\u7ea0\u6b63\u901a\u8fc7\u4fee\u6b63\u9519\u8bef\u6216\u6062\u590d\u5230\u6b63\u786e\u72b6\u6001\u6765\u4f18\u5316\u63a8\u7406\u8fc7\u7a0b\u3002\u8fd9\u4e24\u79cd\u65b9\u6cd5\u5404\u6709\u4f18\u52bf\uff0c\u5176\u4e2d\u7ea0\u6b63\u5728\u63d0\u9ad8\u63a8\u7406\u51c6\u786e\u6027\u7684\u540c\u65f6\u7279\u522b\u6709\u6548\u5730\u4fdd\u7559\u6709\u7528\u4fe1\u606f\u3002\u901a\u8fc7\u7eb3\u5165\u8fd9\u4e9b\u540e\u5904\u7406\u64cd\u4f5c\uff0c\u7cfb\u7edf\u53ef\u4ee5\u6709\u6548\u907f\u514d\u9677\u5165\u903b\u8f91\u6b7b\u89d2\u548c\u91cd\u590d\u5931\u8d25\u7684\u63a8\u7406\u8def\u5f84\uff0c\u4ece\u800c\u5728\u590d\u6742\u95ee\u9898\u89e3\u51b3\u573a\u666f\u4e2d\u589e\u5f3a\u6574\u4f53\u7a33\u5065\u6027\u548c\u53ef\u9760\u6027\u3002\u4ee5\u4e0b\u8ba8\u8bba\u5c06\u66f4\u8be6\u7ec6\u5730\u6df1\u5165\u63a2\u8ba8\u8fd9\u4e9b\u6838\u5fc3\u7b56\u7565\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u4ece\u601d\u7ef4\u94fe\u4e2d\u603b\u7ed3\u77e5\u8bc6<\/span><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><br \/><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u63d0\u9ad8\u6a21\u578b\u5728\u63a8\u7406\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\uff0c\u4e00\u4e9b\u7814\u7a76\u4e13\u6ce8\u4e8e\u603b\u7ed3\u5148\u524d\u89e3\u51b3\u65b9\u6848\u7684\u7ecf\u9a8c\u6765\u6307\u5bfc\u540e\u7eed\u63a8\u7406\u3002\u4f8b\u5982\uff0cZhang \u7b49<\/span><span>[2024k]<\/span><span>\u5728\u8bad\u7ec3\u5b9e\u4f8b\u4e2d\u7eb3\u5165\u53cd\u601d\u7ec4\u4ef6\uff0c\u5982\u901a\u8fc7\u7c7b\u6bd4\u548c\u63a8\u7406\u7684\u66ff\u4ee3\u89e3\u51b3\u65b9\u6848\u6216\u95ee\u9898\u6269\u5c55\uff0c\u5f15\u5bfc\u6a21\u578b\u4ece\u4e0d\u540c\u89d2\u5ea6\u7406\u89e3\u95ee\u9898\u5e76\u79ef\u7d2f\u591a\u6837\u5316\u7684\u63a8\u7406\u7ecf\u9a8c\u3002\u800cWang \u7b49<\/span><span>[2024l]<\/span><span>\u901a\u8fc7\u8bad\u7ec3\u5bf9\u9f50\u5c06\u53cd\u601d\u89c1\u89e3\u6574\u5408\u5230\u4ee3\u7801\u672c\u6a21\u5757\u4e2d\uff0c\u4f7f\u6a21\u578b\u80fd\u591f\u4e3b\u52a8\u68c0\u7d22\u76f8\u5173\u53cd\u601d\u6765\u8f85\u52a9\u63a8\u7406\u8fc7\u7a0b\u3002\u5728\u6811\u641c\u7d22\u63a8\u7406\u4e2d\uff0cHui \u7b49<\/span><span>[2024]<\/span><span>\u8bc6\u522b\u91cd\u8981\u8282\u70b9\u5e76\u53cd\u601d\u540e\u7eed\u884c\u52a8\u548c\u7ed3\u679c\uff0c\u751f\u6210\u4efb\u52a1\u7ea7\u6307\u5bfc\u65b9\u9488\u4ee5\u4f18\u5316\u641c\u7d22\u6548\u7387\u5e76\u907f\u514d\u91cd\u590d\u9519\u8bef\u3002\u540c\u65f6\uff0cLiu \u7b49<\/span><span>[2024c]<\/span><span>\u5f15\u5165\u4e86\u7528\u4e8e\u884c\u52a8\u9009\u62e9\u7684\u6587\u672c\u539f\u5219\uff0c\u901a\u8fc7\u8fed\u4ee3\u53cd\u601d\u4e0d\u65ad\u5b8c\u5584\u8fd9\u4e9b\u539f\u5219\uff0c\u7075\u6d3b\u6307\u5bfc\u884c\u52a8\u6267\u884c\u3002\u6b64\u5916\uff0cZhang \u7b49<\/span><span>[2025a]<\/span><span>\u63d0\u51fa\u4e86\u57fa\u4e8e\u601d\u7ef4\u94fe\u7684\u5408\u6210\u5668<\/span><span>\uff08CoT-based Synthesizer\uff09<\/span><span>\uff0c\u901a\u8fc7\u7ed3\u5408\u591a\u4e2a\u5019\u9009\u89e3\u51b3\u65b9\u6848\u7684\u4e92\u8865\u4fe1\u606f\u6765\u6539\u8fdb\u63a8\u7406\uff0c\u5373\u4f7f\u6240\u6709\u5019\u9009\u89e3\u51b3\u65b9\u6848\u90fd\u6709\u7f3a\u9677\u4e5f\u80fd\u751f\u6210\u66f4\u597d\u7684\u89e3\u51b3\u65b9\u6848\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u8fc7\u6ee4\u4f4e\u8d28\u91cf\u601d\u7ef4\u94fe<\/span><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><br \/><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5f53\u5728\u8bc4\u4f30\u9636\u6bb5\u8bc6\u522b\u51fa\u4f4e\u8d28\u91cf\u89e3\u51b3\u65b9\u6848\u65f6\uff0c\u6700\u7b80\u5355\u7684\u65b9\u6cd5\u662f\u76f4\u63a5\u8fc7\u6ee4\u3002\u4f8b\u5982\uff0c\u5f53\u6807\u51c6\u7b54\u6848\u53ef\u7528\u65f6\uff0c\u53ef\u4ee5\u57fa\u4e8e\u7b54\u6848\u6b63\u786e\u6027\u8fc7\u6ee4\u4f4e\u8d28\u91cf\u89e3\u51b3\u65b9\u6848<\/span><span>&nbsp;[Singh \u7b49\uff0c2023; Gulcehre \u7b49\uff0c2023]<\/span><span>\u3002\u5728\u6ca1\u6709\u6807\u51c6\u7b54\u6848\u7684\u60c5\u51b5\u4e0b\uff0c\u8fc7\u6ee4\u7b56\u7565\u53ef\u4ee5\u57fa\u4e8e\u4e00\u81f4\u6027\u8fdb\u884c\u5b8c\u5584\uff0c\u5982\u56f0\u60d1\u5ea6<\/span><span>\uff08perplexity\uff09[Min \u7b49\uff0c2024]<\/span><span>\u3001\u57fa\u4e8e\u6295\u7968\u7684\u4e00\u81f4\u6027<\/span><span>&nbsp;[Wang \u7b49\uff0c2023c; Chen \u7b49\uff0c2023a]<\/span><span>\u3001\u524d\u5411-\u540e\u5411\u4e00\u81f4\u6027<\/span><span>&nbsp;[Jiang \u7b49\uff0c2024b; Weng \u7b49\uff0c2023]<\/span><span>\uff0c\u6216\u901a\u8fc7\u6784\u5efa\u9488\u5bf9\u591a\u9879\u9009\u62e9\u9898\u6027\u8d28\u7684\u540e\u7eed\u95ee\u9898\u6765\u8bc4\u4f30\u89e3\u51b3\u65b9\u6848\u4e00\u81f4\u6027&nbsp;<\/span><span>[Ankner \u7b49\uff0c2024a; Lee \u7b49\uff0c2024b]<\/span><span>\u3002\u6b64\u5916\uff0c\u53ef\u5b66\u4e60\u7684\u9a8c\u8bc1\u5668<\/span><span>&nbsp;[Cobbe \u7b49\uff0c2021; Yu \u7b49\uff0c2023a; Stiennon \u7b49\uff0c2020]<\/span><span>\u53ef\u4ee5\u7528\u6765\u8fdb\u4e00\u6b65\u589e\u5f3a\u8fc7\u6ee4\u8fc7\u7a0b\u3002\u867d\u7136\u7b80\u5355\u8fc7\u6ee4\u65e2\u9ad8\u6548\u53c8\u6613\u4e8e\u5b9e\u65bd\uff0c\u4f46\u901a\u5e38\u4f1a\u5bfc\u81f4\u5927\u91cf\u63a8\u7406\u6570\u636e\u6d6a\u8d39\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><br \/><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u7ea0\u6b63\u4f4e\u8d28\u91cf\u601d\u7ef4\u94fe<\/span><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9664\u4e86\u76f4\u63a5\u8fc7\u6ee4\u5916\uff0c\u7ea0\u6b63\u4e0d\u6b63\u786e\u7684\u89e3\u51b3\u65b9\u6848\u6709\u52a9\u4e8e\u6700\u5927\u9650\u5ea6\u5730\u5229\u7528\u5df2\u6709\u6570\u636e\uff0c\u56e0\u6b64\u4f4e\u8d28\u91cf\u89e3\u51b3\u65b9\u6848\u7684\u6539\u8fdb\u5df2\u6210\u4e3a\u7814\u7a76\u7684\u5173\u952e\u65b9\u5411\u3002\u65e9\u671f\u7814\u7a76\u4e3b\u8981\u4f9d\u8d56\u6a21\u578b\u7684\u5185\u5728\u80fd\u529b\u5bf9\u89e3\u51b3\u65b9\u6848\u8fdb\u884c\u4f18\u5316\u3002\u4f8b\u5982\uff0cMadaan \u7b49<\/span><span>&nbsp;[2023b]&nbsp;<\/span><span>\u4f7f\u7528\u81ea\u751f\u6210\u53cd\u9988\u5bf9\u521d\u59cb\u8f93\u51fa\u8fdb\u884c\u8fed\u4ee3\u4f18\u5316\uff1bZhang \u7b49<\/span><span>&nbsp;[2024g]<\/span><span>&nbsp;\u5219\u901a\u8fc7\u5927\u8bed\u8a00\u6a21\u578b\u6bd4\u8f83\u591a\u4e2a\u65b9\u6848\uff0c\u5e76\u5c06\u5dee\u5f02\u6574\u7406\u4e3a\u5bf9\u7167\u6e05\u5355\uff0c\u4ee5\u589e\u5f3a\u81ea\u6211\u53cd\u601d\u8fc7\u7a0b\u7684\u4e00\u81f4\u6027\u4e0e\u53ef\u9760\u6027\u3002\u7136\u800c\uff0c\u4ec5\u4f9d\u8d56\u5185\u5728\u7ea0\u6b63\u65b9\u6cd5\u5f80\u5f80\u96be\u4ee5\u5145\u5206\u4f18\u5316\u7ed3\u679c\uff0c\u8868\u660e\u5f15\u5165\u5916\u90e8\u4fe1\u606f\u5bf9\u4e8e\u63d0\u5347\u6539\u8fdb\u6548\u679c\u81f3\u5173\u91cd\u8981\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6700\u8fd1\u7684\u7814\u7a76\u63d0\u51fa\u591a\u79cd\u7b56\u7565\u589e\u5f3a\u7ea0\u6b63\u6027\u80fd\u3002Ferraz \u7b49<\/span><span>[2024]<\/span><span>\u3001Wu \u7b49<\/span><span>[2024b]<\/span><span>\u901a\u8fc7\u7ec6\u7c92\u5ea6\u7ea6\u675f\u5206\u89e3\u548c\u540e\u5411\u63a8\u7406\u4f18\u5316\u7ea0\u6b63\u8fc7\u7a0b\u3002Gou \u7b49<\/span><span>[2024]<\/span><span>\u5229\u7528\u5916\u90e8\u5de5\u5177\u53cd\u9988\uff0cLi \u7b49<\/span><span>[2024b]<\/span><span>\u3001Gao \u7b49<\/span><span>[2024c]<\/span><span>\u3001Chen \u7b49<\/span><span>[2023b]<\/span><span>\u3001Yuan \u7b49<\/span><span>[2024a]<\/span><span>\u501f\u52a9Python\u89e3\u91ca\u5668\u8fed\u4ee3\u6267\u884c\u4ee3\u7801\uff0c\u76f4\u81f3\u6210\u529f\u8bc4\u4f30\u3002Ramji \u7b49<\/span><span>[2024]<\/span><span>\u91c7\u7528\u7b56\u7565\u6307\u6807\u4f5c\u4e3a\u8d28\u91cf\u53cd\u9988\uff0c\u8fed\u4ee3\u4f18\u5316\u7b54\u6848\u3002Wu \u7b49<\/span><span>[2024c]<\/span><span>\u8bad\u7ec3PSV\u6a21\u578b\u8bc6\u522b\u5e76\u7ea0\u6b63\u9519\u8bef\u6b65\u9aa4\uff0c\u786e\u4fdd\u51c6\u786e\u6027\u3002Shridhar \u7b49<\/span><span>[2024]<\/span><span>\u8bad\u7ec3\u63d0\u95ee\u8005\u6a21\u578b\u751f\u6210\u5b50\u95ee\u9898\u4ee5\u8f85\u52a9\u7ea0\u6b63\u3002\u4e3a\u589e\u5f3a\u6a21\u578b\u7684\u6279\u8bc4\u4e0e\u6539\u8fdb\u80fd\u529b\uff0cZheng \u7b49<\/span><span>[2024]<\/span><span>\u3001Xi \u7b49<\/span><span>[2024]<\/span><span>\u3001Yan \u7b49<\/span><span>[2024]<\/span><span>\u3001Zhang \u7b49<\/span><span>[2024i]<\/span><span>\u63d0\u51fa\u8bad\u7ec3\u6a21\u578b\u63d0\u4f9b\u6279\u8bc4\u4ee5\u63a8\u52a8\u6539\u8fdb\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4ece\u7406\u8bba\u89d2\u5ea6\u770b\uff0c\u8fed\u4ee3\u7ea0\u6b63\u65b9\u6cd5\u53ef\u89c6\u4e3a\u901a\u8fc7\u7ebf\u6027\u641c\u7d22\u8fdb\u884c\u7684\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b<\/span><span>\uff08Markov Decision Process, MDP\uff09<\/span><span>\uff0c\u5176\u4e2d\u89e3\u51b3\u65b9\u6848\u4e3a\u72b6\u6001\uff0c\u7ea0\u6b63\u64cd\u4f5c\u5219\u4e3a\u72b6\u6001\u8f6c\u79fb\u7684\u884c\u52a8\u3002\u7b80\u5355\u7684\u7ebf\u6027\u641c\u7d22\u901a\u5e38\u6548\u679c\u5e73\u5e73\uff0c\u4f46\u66f4\u590d\u6742\u7684\u6811\u641c\u7d22\u65b9\u6cd5\u7406\u8bba\u4e0a\u80fd\u83b7\u5f97\u66f4\u4f18\u7ed3\u679c\u3002\u4f8b\u5982\uff0cZhang \u7b49[2024e,d]\u5c06\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08Monte Carlo Tree Search, MCTS\uff09<\/span><span>\u4e0e\u81ea\u6211\u8fdb\u5316\u673a\u5236\u7ed3\u5408\uff0c\u4f18\u5316\u590d\u6742\u6570\u5b66\u63a8\u7406\u4efb\u52a1\u7684\u89e3\u51b3\u65b9\u6848\u3002\u8be5\u7b97\u6cd5\u521d\u59cb\u5316\u6839\u8282\u70b9\uff0c\u5e76\u901a\u8fc7\u4ef7\u503c\u51fd\u6570 Q \u9009\u62e9\u6700\u5177\u6f5c\u529b\u7684\u8282\u70b9\u8fdb\u884c\u6269\u5c55\u3002\u5728\u81ea\u6211\u8fdb\u5316\u9636\u6bb5\uff0c\u6a21\u578b\u901a\u8fc7\u53cd\u9988\u7ea0\u6b63\u7b54\u6848\u5e76\u751f\u6210\u66f4\u4f18\u89e3\uff0c\u968f\u540e\u901a\u8fc7\u81ea\u6211\u8bc4\u4f30\u6253\u5206\uff0c\u8fd9\u4e9b\u5206\u6570\u7528\u4e8e\u66f4\u65b0\u6811\u4e2d\u8282\u70b9\u7684\u4ef7\u503c\u8bc4\u4f30\u3002\u8282\u70b9\u7684\u6811\u4e0a\u7f6e\u4fe1\u4e0a\u754c<\/span><span>\uff08Upper Confidence Bound for Trees\uff0cUCT\uff09<\/span><span>\u503c\u901a\u8fc7\u6539\u8fdb\u7684\u516c\u5f0f\u8fdb\u884c\u66f4\u65b0\u3002\u8be5\u8fc7\u7a0b\u6301\u7eed\u8fdb\u884c\uff0c\u76f4\u5230\u6ee1\u8db3\u7ec8\u6b62\u6761\u4ef6\uff0c\u65e2\u63d0\u5347\u4e86\u89e3\u51b3\u65b9\u6848\u8d28\u91cf\uff0c\u4e5f\u4e0d\u65ad\u63a2\u7d22\u65b0\u7684\u53ef\u80fd\u6027\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><br \/><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u5176\u4ed6\u65b9\u6cd5<\/span><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9664\u4e0a\u8ff0\u7ea0\u6b63\u3001\u8fc7\u6ee4\u4e0e\u603b\u7ed3\u64cd\u4f5c\u5916\uff0c\u8fd8\u53ef\u91c7\u7528\u5176\u4ed6\u540e\u5904\u7406\u65b9\u5f0f\uff0c\u5982\u56de\u6eaf\u673a\u5236<\/span><span>&nbsp;[Qin \u7b49\uff0c2024\uff1bYang \u7b49\uff0c2025b]<\/span><span>\u3002\u5f53\u7cfb\u7edf\u68c0\u6d4b\u5230\u9519\u8bef\u65f6\uff0c\u53ef\u56de\u6eaf\u81f3\u5148\u524d\u72b6\u6001\uff0c\u91cd\u65b0\u63a2\u7d22\u66ff\u4ee3\u7684\u63a8\u7406\u8def\u5f84\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">3.2.2 \u7528\u4e8e\u77ed\u601d\u7ef4\u94fe\u7684\u663e\u5f0f\u6811\u641c\u7d22<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u672c\u5c0f\u8282\u4e2d\uff0c\u6211\u4eec\u5c06\u201c\u77ed\u601d\u7ef4\u94fe\u201d<\/span><span>\uff08Short CoT\uff09<\/span><span>\u5b9a\u4e49\u4e3a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u4ec5\u5305\u542b\u903b\u8f91\u63a8\u7406\u6b65\u9aa4\u3001\u4e0d\u6d89\u53ca\u8bc4\u4f30\u4e0e\u7ea0\u6b63\u7b49\u884c\u4e3a\u7684\u601d\u7ef4\u94fe<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u77ed\u601d\u7ef4\u94fe\u4f53\u73b0\u4e86\u4e00\u79cd\u4ece\u4efb\u52a1\u63d0\u793a\u76f4\u63a5\u901a\u5411\u6807\u51c6\u7b54\u6848\u7684\u7cbe\u7b80\u63a8\u7406\u8fc7\u7a0b\uff0c\u8981\u6c42\u6bcf\u4e2a\u4e2d\u95f4\u6b65\u9aa4\u5747\u4e3a\u6b63\u786e\u3002\u9488\u5bf9\u4f20\u7edf\u601d\u7ef4\u94fe\u51c6\u786e\u7387\u8f83\u4f4e\u7684\u95ee\u9898\uff0c\u7814\u7a76\u8005\u63d0\u51fa\u57fa\u4e8e\u542f\u53d1\u5f0f\u641c\u7d22<\/span><span>\uff08\u5c24\u5176\u662f\u6811\u641c\u7d22\uff09<\/span><span>\u7684\u589e\u5f3a\u65b9\u6cd5\uff0c\u4ee5\u63d0\u9ad8\u5bf9\u77ed\u601d\u7ef4\u94fe\u7684\u641c\u7d22\u6548\u7387\u3002\u6211\u4eec\u5c06\u6b64\u7c7b\u65b9\u6cd5\u7edf\u4e00\u5f52\u7eb3\u4e3a\u201c\u663e\u5f0f\u6811\u641c\u7d22\u201d<\/span><span>\uff08Explicit Tree Search\uff09<\/span><span>\u6846\u67b6\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u77ed\u601d\u7ef4\u94fe\u672c\u8eab\u4e0d\u5305\u542b\u8bc4\u4f30\u7b49\u884c\u4e3a\uff0c\u4f46\u5728\u641c\u7d22\u8fc7\u7a0b\u4e2d\u4ecd\u53ef\u5f15\u5165\u8bc4\u4f30\u673a\u5236\u8f85\u52a9\u63a8\u7406\u3002\u641c\u7d22\u8fc7\u7a0b\u4e2d\uff0c\u8bc4\u4f30\u51fd\u6570\u7528\u4e8e\u5f15\u5bfc\u63a2\u7d22\u65b9\u5411\u5e76\u6267\u884c\u526a\u679d\uff0c\u4ece\u800c\u4f18\u5148\u4fdd\u7559\u6700\u5177\u6f5c\u529b\u7684\u8def\u5f84\u3002\u8be5\u65b9\u6cd5\u5728\u63d0\u5347\u641c\u7d22\u6548\u7387\u7684\u540c\u65f6\uff0c\u4ea6\u4fdd\u6301\u4e86\u6240\u751f\u6210\u601d\u7ef4\u94fe\u7684\u51c6\u786e\u6027\u4e0e\u7b80\u6d01\u6027\u3002\u6839\u636e\u5e95\u5c42\u641c\u7d22\u7b56\u7565\u7684\u4e0d\u540c\uff0c\u663e\u5f0f\u6811\u641c\u7d22\u7b97\u6cd5\u53ef\u5206\u4e3a\u82e5\u5e72\u7c7b\u578b\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u6734\u7d20\u7684\u5e7f\u5ea6\u4f18\u5148\/\u6df1\u5ea6\u4f18\u5148\u641c\u7d22\uff08naive BFS\/DFS\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u675f\u641c\u7d22\uff08Beam Search\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3001<\/span><\/span><strong style=\"font-size: 15px\"><span><span>A*\u7b97\u6cd5<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4ee5\u53ca<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\uff08MCTS\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5e7f\u5ea6\u4f18\u5148\/\u6df1\u5ea6\u4f18\u5148\u641c\u7d22\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u601d\u7ef4\u6811<\/span><span>\uff08Tree-of-Thoughts, ToT\uff09[Yao \u7b49\uff0c2023]&nbsp;<\/span><span>\u5c06\u95ee\u9898\u5206\u89e3\u4e3a\u591a\u4e2a\u601d\u7ef4\u8282\u70b9\uff0c\u5e76\u5229\u7528\u7ecf\u5178\u641c\u7d22\u7b97\u6cd5\u2014\u2014\u5e7f\u5ea6\u4f18\u5148\u641c\u7d22<\/span><span>\uff08BFS\uff09<\/span><span>\u548c\u6df1\u5ea6\u4f18\u5148\u641c\u7d22<\/span><span>\uff08DFS\uff09<\/span><span>\u2014\u2014\u63a2\u7d22\u591a\u6837\u5316\u7684\u63a8\u7406\u8def\u5f84\uff0c\u663e\u8457\u589e\u5f3a\u4e86\u8bed\u8a00\u6a21\u578b\u5728\u590d\u6742\u4efb\u52a1\u4e2d\u7684\u95ee\u9898\u89e3\u51b3\u80fd\u529b\u3002Qin \u7b49<\/span><span>&nbsp;[2023]<\/span><span>&nbsp;\u5c06\u641c\u7d22\u8fc7\u7a0b\u4e0e\u5de5\u5177\u4f7f\u7528\u76f8\u7ed3\u5408\uff0c\u91c7\u7528 DFS \u5904\u7406\u5de5\u5177\u7ec4\u5408\u4e0e\u9519\u8bef\u7ba1\u7406\uff0c\u4ece\u800c\u63d0\u5347\u6a21\u578b\u5728\u771f\u5b9e\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\u3002\u4e0a\u8ff0\u65b9\u6cd5\u4f9d\u8d56\u5916\u90e8\u7a0b\u5e8f<\/span><span>\uff08\u5982 Python \u4ee3\u7801\uff09<\/span><span>\u5b9a\u4e49\u641c\u7d22\u903b\u8f91\u3002\u7136\u800c\uff0c\u8fd9\u4e9b\u88ab\u52a8\u641c\u7d22\u65b9\u6cd5\u6548\u7387\u8f83\u4f4e\uff0c\u7075\u6d3b\u6027\u6709\u9650\u3002\u81ea\u4e3b\u6811\u641c\u7d22<\/span><span>\uff08Autonomous Tree-Search\uff09[Zhang \u7b49\uff0c2023b]&nbsp;<\/span><span>\u901a\u8fc7\u63d0\u793a\u76f4\u63a5\u5f15\u5bfc\u5927\u8bed\u8a00\u6a21\u578b\u72ec\u7acb\u6267\u884c BFS \u6216 DFS\uff0c\u81ea\u4e3b\u63a2\u7d22\u591a\u6761\u89e3\u51b3\u8def\u5f84\uff0c\u63d0\u5347\u63a8\u7406\u7075\u6d3b\u6027\u3002\u601d\u7ef4\u7b97\u6cd5<\/span><span>\uff08Algorithm-of-Thought, AoT\uff09[Sel \u7b49\uff0c2023]&nbsp;<\/span><span>\u5219\u5229\u7528 BFS\/DFS \u7684\u6574\u4e2a\u641c\u7d22\u8def\u5f84\u4f5c\u4e3a\u63d0\u793a\uff0c\u6574\u5408\u601d\u7ef4\u94fe\u4e0e\u601d\u7ef4\u6811\u7684\u4f18\u52bf\uff0c\u4f7f\u6a21\u578b\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u80fd\u52a8\u6001\u8c03\u6574\u8def\u5f84\uff0c\u4ece\u800c\u66f4\u9ad8\u6548\u5730\u53d1\u73b0\u89e3\u6cd5\u3002\u6b64\u5916\uff0cAoT \u907f\u514d\u4e86 ToT \u6240\u9700\u7684\u591a\u8f6e\u67e5\u8be2\uff0c\u964d\u4f4e\u4e86\u63a8\u7406\u5f00\u9500\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u675f\u641c\u7d22\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u675f\u641c\u7d22\u4f5c\u4e3a\u5e7f\u5ea6\u4f18\u5148\u641c\u7d22\u7684\u4e00\u79cd\u53d8\u4f53\uff0c\u5728\u641c\u7d22\u8fc7\u7a0b\u4e2d\u7ef4\u62a4k\u4e2a\u5019\u9009\u5e8f\u5217<\/span><span>\uff08\u79f0\u4e3a\u675f\uff09<\/span><span>\uff0c\u5b9e\u73b0\u4e86\u641c\u7d22\u51c6\u786e\u6027\u548c\u8ba1\u7b97\u6548\u7387\u4e4b\u95f4\u7684\u6709\u6548\u5e73\u8861\u3002\u5176\u4e0e\u5927\u8bed\u8a00\u6a21\u578b\u81ea\u56de\u5f52\u751f\u6210\u7684\u5951\u5408\u4f7f\u5176\u7279\u522b\u9002\u5408\u5728\u89e3\u7801\u8fc7\u7a0b\u4e2d\u6307\u5bfc\u524d\u5411\u641c\u7d22\u3002\u6839\u636e\u641c\u7d22\u7684\u7c92\u5ea6\uff0c\u675f\u641c\u7d22\u53ef\u5206\u4e3a\u4e09\u4e2a\u5c42\u6b21\uff1a\u8bcd\u5143\u7ea7<\/span><span>\uff08token-level\uff09<\/span><span>\u3001\u6b65\u9aa4\u7ea7<\/span><span>\uff08step-level\uff09<\/span><span>\u548c\u89e3\u51b3\u65b9\u6848\u7ea7<\/span><span>\uff08solution-level\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\u675f\u641c\u7d22<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5728\u6a21\u578b\u751f\u6210\u7684\u6700\u5c0f\u5355\u4f4d\u4e0a\u64cd\u4f5c\uff0c\u76f4\u63a5\u4e0e\u5927\u8bed\u8a00\u6a21\u578b\u89e3\u7801\u8fc7\u7a0b\u5bf9\u9f50\u3002\u867d\u7136\u4f20\u7edf\u675f\u641c\u7d22\u57fa\u4e8e\u8bcd\u5143\u5bf9\u6570\u6982\u7387\u5bf9\u5e8f\u5217\u8fdb\u884c\u6392\u5e8f\uff0c\u4f46\u8fd9\u79cd\u65b9\u6cd5\u4f18\u5148\u8003\u8651\u81ea\u7136\u8bed\u8a00\u6d41\u7545\u6027\u800c\u975e\u63a8\u7406\u8d28\u91cf\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u5c40\u9650\u6027\uff0cLee \u7b49<\/span><span>[2024c]<\/span><span>\u5f15\u5165\u4e86\u8bcd\u5143\u76d1\u7763\u4ef7\u503c\u6a21\u578b\uff0c\u5bf9\u8bcd\u5143\u8fdb\u884c\u8bc4\u5206\u4ee5\u63d0\u9ad8\u6570\u5b66\u63a8\u7406\u7684\u51c6\u786e\u6027\u3002\u6b64\u5916\uff0c\u4e3a\u7f13\u89e3\u751f\u6210\u5e8f\u5217\u591a\u6837\u6027\u4e0d\u8db3\u7684\u95ee\u9898\uff0cVijayakumar \u7b49<\/span><span>[2016]<\/span><span>\u63d0\u51fa\u591a\u6837\u5316\u675f\u641c\u7d22\uff0c\u5c06\u675f\u5206\u6210\u591a\u4e2a\u7ec4\uff0c\u5728\u6bcf\u4e2a\u7ec4\u5185\u72ec\u7acb\u4f18\u5316\uff0c\u5e76\u5728\u7ec4\u95f4\u5f15\u5165\u591a\u6837\u6027\u60e9\u7f5a\uff0c\u4ee5\u9f13\u52b1\u751f\u6210\u66f4\u591a\u6837\u7684\u63a8\u7406\u8def\u5f84\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6b65\u9aa4\u7ea7\u675f\u641c\u7d22<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5c06\u591a\u6b65\u63a8\u7406\u5206\u89e3\u4e3a\u5b50\u6b65\u9aa4\uff0c\u5bf9\u6bcf\u4e2a\u5b50\u6b65\u9aa4\u8fdb\u884c\u8bc4\u5206\u548c\u9a8c\u8bc1\uff0c\u4ee5\u7ef4\u6301\u9ad8\u8d28\u91cf\u7684\u5019\u9009\u8def\u5f84\u3002\u4f8b\u5982\uff0cWang \u7b49<\/span><span>[2024i]<\/span><span>\u3001Ma \u7b49<\/span><span>[2023]<\/span><span>\u4f7f\u7528\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRM\uff09<\/span><span>\u5bf9\u5b50\u6b65\u9aa4\u8fdb\u884c\u6253\u5206\uff0c\u5229\u7528\u8fd9\u4e9b\u5206\u6570\u5f15\u5bfc\u641c\u7d22\u671d\u6709\u5e0c\u671b\u7684\u63a8\u7406\u8def\u5f84\u53d1\u5c55\u3002\u7c7b\u4f3c\u5730\uff0cChen \u7b49<\/span><span>[2024b]<\/span><span>\u3001Yu \u7b49<\/span><span>[2023a]<\/span><span>\u5229\u7528\u5b66\u4e60\u7684\u4ef7\u503c\u6a21\u578b\u5728\u6b65\u9aa4\u7ea7\u522b\u589e\u5f3a\u641c\u7d22\u6548\u7387\uff0c\u907f\u514d\u4e86\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u7684\u8ba1\u7b97\u5f00\u9500\u3002Setlur \u7b49<\/span><span>[2024]<\/span><span>\u8fdb\u4e00\u6b65\u7ed3\u5408\u8fc7\u7a0b\u4f18\u52bf\u6765\u5b8c\u5584\u641c\u7d22\u8fc7\u7a0b\u3002\u4e0e\u5916\u90e8\u8bc4\u4f30\u65b9\u6cd5\u4e0d\u540c\uff0cXie \u7b49<\/span><span>[2023]<\/span><span>\u5229\u7528\u6a21\u578b\u672c\u8eab\u8fdb\u884c\u81ea\u6211\u9a8c\u8bc1\uff0c\u63d0\u793a\u5b83\u9a8c\u8bc1\u6b65\u9aa4\u6b63\u786e\u6027\uff0c\u540c\u65f6\u901a\u8fc7\u6e29\u5ea6\u8c03\u6574\u7684\u968f\u673a\u5316\u5f15\u5165\u591a\u6837\u6027\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u89e3\u51b3\u65b9\u6848\u7ea7\u675f\u641c\u7d22<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u72ec\u7acb\u8bc4\u4f30\u6574\u4e2a\u63a8\u7406\u8def\u5f84\uff0c\u901a\u8fc7\u907f\u514d\u4e2d\u95f4\u64cd\u4f5c\u63d0\u4f9b\u66f4\u5feb\u7684\u63a8\u7406\u3002\u4f8b\u5982\uff0cBest-of-N<\/span><span>\uff08BoN\uff09<\/span><span>\u91c7\u6837\u751f\u6210\u591a\u4e2a\u5b8c\u6574\u89e3\u51b3\u65b9\u6848\uff0c\u5e76\u4f7f\u7528\u5956\u52b1\u6a21\u578b\u9009\u62e9\u8bc4\u5206\u6700\u9ad8\u7684\u89e3\u3002\u7136\u800c\uff0cWang \u7b49<\/span><span>[2024i]<\/span><span>\u5f3a\u8c03\u4e86\u5956\u52b1\u6a21\u578b\u5728\u533a\u5206\u76f8\u4f3c\u63a8\u7406\u8fc7\u7a0b\u65b9\u9762\u7684\u5c40\u9650\u6027\uff0c\u63d0\u51fa\u4e86\u4e00\u79cd\u6210\u5bf9\u504f\u597d\u6a21\u578b\u4ee5\u5b9e\u73b0\u66f4\u6709\u6548\u7684\u6392\u540d\u3002\u540c\u65f6\uff0cWang \u548c Zhou&nbsp;<\/span><span>[2024]<\/span><span>\u89c2\u5bdf\u5230\u6a21\u578b\u53ef\u4ee5\u901a\u8fc7\u91c7\u6837\u81ea\u52a8\u751f\u6210\u601d\u7ef4\u94fe\u63a8\u7406\uff0c\u800c\u57fa\u4e8e\u601d\u7ef4\u94fe\u5f97\u51fa\u7684\u7b54\u6848\u8868\u73b0\u51fa\u66f4\u9ad8\u7684\u7f6e\u4fe1\u5ea6\u3002\u5229\u7528\u8fd9\u4e00\u89c1\u89e3\uff0c\u4ed6\u4eec\u5f15\u5165\u4e86\u601d\u7ef4\u94fe\u89e3\u7801<\/span><span>\uff08CoT-decoding\uff09<\/span><span>\uff0c\u8fd9\u662f\u4e00\u79cd\u901a\u8fc7\u6539\u53d8\u89e3\u7801\u8fc7\u7a0b\u9690\u5f0f\u6267\u884c\u601d\u7ef4\u94fe\u63a8\u7406\u7684\u65b9\u6cd5\uff0c\u901a\u8fc7top-k\u91c7\u6837\u751f\u6210\u591a\u4e2a\u5e8f\u5217\uff0c\u5e76\u57fa\u4e8e\u7b54\u6848\u7f6e\u4fe1\u5ea6\u9009\u62e9\u6700\u4f73\u5e8f\u5217\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>A*\u641c\u7d22\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>A*\u7b97\u6cd5\u901a\u8fc7\u4f7f\u7528\u8bc4\u4f30\u51fd\u6570<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;105082c7-baf8-4a40-9f04-dbfb05800bd7&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;f(n) = g(n) + h(n)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>f(n) = g(n) + h(n)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u6269\u5c55\u6700\u5177\u6f5c\u529b\u7684\u8282\u70b9\uff0c\u4ece\u800c\u63d0\u5347\u641c\u7d22\u6548\u7387\u3002\u5176\u4e2d\uff0c<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;72239b3f-08c1-4d9b-97ea-04ee00d4618d&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;g(n)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>g(n)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u8868\u793a\u4ece\u521d\u59cb\u72b6\u6001\u5230\u5f53\u524d\u8282\u70b9\u7684\u7d2f\u79ef\u4ee3\u4ef7\uff0c<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;db941377-a7ff-4f17-bcb5-e7e768edd575&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;h(n)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>h(n)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u662f\u7528\u4e8e\u4f30\u8ba1\u4ece\u5f53\u524d\u8282\u70b9\u5230\u76ee\u6807\u72b6\u6001\u7684\u542f\u53d1\u5f0f\u4ee3\u4ef7\u51fd\u6570\u3002\u8be5\u6846\u67b6\u5df2\u88ab\u6539\u9020\u4ee5\u589e\u5f3a\u5927\u8bed\u8a00\u6a21\u578b\u7684\u591a\u6b65\u63a8\u7406\u80fd\u529b\uff0c\u5728\u641c\u7d22\u6548\u7387\u65b9\u9762\u4f18\u4e8e\u4f20\u7edf\u7684\u601d\u7ef4\u6811<\/span><span>\uff08ToT\uff09<\/span><span>\u65b9\u6cd5\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5df2\u6709\u591a\u9879\u7814\u7a76\u5c06 A* \u539f\u5219\u6574\u5408\u8fdb\u5927\u8bed\u8a00\u6a21\u578b\u7684\u63a8\u7406\u8fc7\u7a0b\u3002Zhuang \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u63d0\u51fa ToolChain*\uff0c\u4e3a\u7279\u5b9a\u4efb\u52a1\u7ef4\u62a4\u4e00\u5957\u201c\u957f\u671f\u8bb0\u5fc6\u201d\u673a\u5236\uff0c\u7528\u4e8e\u5b58\u50a8\u63a8\u7406\u7ecf\u9a8c\u3002\u8be5\u8bb0\u5fc6\u7cfb\u7edf\u521d\u59cb\u7531\u793a\u4f8b\u6570\u636e\u9884\u7f6e\uff0c\u5e76\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u4e0d\u65ad\u5438\u6536\u6b63\u786e\u7684\u89e3\u8def\u5f84\uff0c\u4ece\u800c\u5b9e\u73b0\u52a8\u6001\u6269\u5c55\u3002ToolChain* \u901a\u8fc7\u6700\u957f\u516c\u5171\u5b50\u5e8f\u5217<\/span><span>\uff08Longest Common Subsequence\uff09<\/span><span>\u5c06\u65b0\u4efb\u52a1\u4e0e\u5386\u53f2\u7ecf\u9a8c\u5339\u914d\uff0c\u8fdb\u800c\u4f30\u8ba1\u7d2f\u79ef\u4e0e\u672a\u6765\u6210\u672c\uff0c\u5b9e\u73b0\u5bf9\u590d\u6742\u89c4\u5212\u4e0e\u63a8\u7406\u4efb\u52a1\u4e2d\u6700\u4f18\u89e3\u7684\u9ad8\u6548\u8bc6\u522b\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0cWang \u7b49<\/span><span>&nbsp;[2024a]<\/span><span>&nbsp;\u5f15\u5165\u4e86 Q*\uff0c\u8be5\u65b9\u6cd5\u91c7\u7528\u8bad\u7ec3\u5f97\u5230\u7684 Q \u503c\u6a21\u578b\u4e3a\u6bcf\u4e2a\u72b6\u6001\u8ba1\u7b97\u542f\u53d1\u5f0f\u503c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;704fcc77-79ad-4e68-b00a-aad0ae64a92f&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;h(x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>h(x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u4ece\u800c\u4f7f A* \u7b97\u6cd5\u53ef\u6269\u5c55\u81f3\u6570\u5b66\u3001\u7f16\u7a0b\u7b49\u9886\u57df\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fdb\u4e00\u6b65\u7684\u7814\u7a76\u5229\u7528\u5927\u8bed\u8a00\u6a21\u578b\u7684\u56fa\u6709\u80fd\u529b\u5bf9 A* \u641c\u7d22\u8fdb\u884c\u4f18\u5316\u3002Meng \u7b49<\/span><span>&nbsp;[2024a]<\/span><span>&nbsp;\u63d0\u51fa LLM-A*\uff0c\u8be5\u65b9\u6cd5\u5229\u7528\u5927\u8bed\u8a00\u6a21\u578b\u7684\u5168\u5c40\u7406\u89e3\u751f\u6210\u4e2d\u95f4\u5f15\u5bfc\u70b9<\/span><span>\uff08waypoints\uff09<\/span><span>\uff0c\u4ee5\u5f15\u5bfc A* \u641c\u7d22\u65b9\u5411\u5e76\u51cf\u5c11\u5197\u4f59\u72b6\u6001\u7684\u63a2\u7d22\u3002Gupta \u548c Li&nbsp;<\/span><span>[2024]&nbsp;<\/span><span>\u8bad\u7ec3\u5927\u8bed\u8a00\u6a21\u578b\u5b66\u4e60\u771f\u5b9e\u4ee3\u4ef7&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;b41e7286-40e1-4348-b8ba-a8f72ea7671c&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;h^\u2217(n)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m96npebnvt5\" data-pm-slice=\"0 0 []\"><span><span>h<\/span><\/span><sup><span>\u2217<\/span><\/sup><span><span>(n)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e0e\u542f\u53d1\u5f0f\u4f30\u8ba1&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;42cda70b-1969-4240-8525-04676fcc8ce3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;h(x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>h(x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e4b\u95f4\u7684\u6b8b\u5dee<\/span><span>\uff08\u5dee\uff09<\/span><span>\uff0c\u901a\u8fc7\u51cf\u5c11\u8fed\u4ee3\u6b21\u6570\u52a0\u5feb\u641c\u7d22\u6536\u655b\u3002Lehnert \u7b49&nbsp;<\/span><span>[2024]&nbsp;<\/span><span>\u63d0\u51fa Searchformer\uff0c\u5c06 A* \u7684\u6267\u884c\u8f68\u8ff9\u8f6c\u5316\u4e3a\u6807\u8bb0\u5e8f\u5217\uff0c\u8fdb\u800c\u5f15\u5bfc Transformer \u6a21\u578b\u8fed\u4ee3\u91c7\u6837\u66f4\u77ed\u8def\u5f84\u3002\u7c7b\u4f3c\u5730\uff0cSu \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u63d0\u51fa Dualformer\uff0c\u5728 A* \u641c\u7d22\u8fc7\u7a0b\u4e2d\u5f15\u5165\u968f\u673a\u4fe1\u606f\u4e22\u5f03\u673a\u5236\uff0c\u4f7f\u6a21\u578b\u5728\u641c\u7d22\u8fc7\u7a0b\u4e2d\u5b9e\u73b0\u5feb\u601d\u7ef4\u4e0e\u6162\u601d\u7ef4\u7684\u52a8\u6001\u5e73\u8861\uff0c\u4ece\u800c\u4f18\u5316\u641c\u7d22\u7b56\u7565\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08Monte Carlo Tree Search, MCTS\uff09<\/span><span>\u662f\u4e00\u79cd\u5728\u63a2\u7d22\u4e0e\u5229\u7528\u4e4b\u95f4\u5b9e\u73b0\u5e73\u8861\u7684\u641c\u7d22\u7b97\u6cd5\uff0c\u5728\u4ee5\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b<\/span><span>\uff08Markov Decision Process, MDP\uff09<\/span><span>\u5efa\u6a21\u7684\u4efb\u52a1\u4e2d\u8868\u73b0\u51fa\u8272<\/span><span>[Chen \u7b49\uff0c2024b; Wu \u7b49\uff0c2024a]<\/span><span>\u3002\u8fd9\u7c7b\u4efb\u52a1\u901a\u5e38\u8981\u6c42 MCTS \u5728\u5e9e\u5927\u7684\u72b6\u6001-\u884c\u52a8\u7a7a\u95f4\u4e2d\u53d1\u6325\u5176\u63a2\u7d22\u4e0e\u5229\u7528\u5e73\u8861\u7684\u4f18\u52bf\uff0c\u4ee5\u53d1\u73b0\u9ad8\u4ef7\u503c\u7684\u884c\u52a8\u8f68\u8ff9\u3002 AlphaGo Zero<\/span><span>&nbsp;[Silver \u7b49\uff0c2017]<\/span><span>\u662f\u5176\u4e2d\u4ee3\u8868\uff0c\u8be5\u7cfb\u7edf\u57fa\u4e8e MCTS \u5b9e\u73b0\uff0c\u5728\u56f4\u68cb\u4efb\u52a1\u4e2d\u641c\u7d22\u9ad8\u8d28\u91cf\u7684\u843d\u5b50\u5e8f\u5217\uff0c\u4e0d\u65ad\u4f18\u5316\u7b56\u7565\u7f51\u7edc\u7684\u6027\u80fd\u3002\u53d7 AlphaGo Zero \u542f\u53d1\uff0c\u7814\u7a76\u8005\u63d0\u51fa\u4e86\u5728\u590d\u6742\u884c\u52a8\u7a7a\u95f4\u4e2d\u4f7f\u7528 MCTS \u641c\u7d22\u9ad8\u8d28\u91cf\u63a8\u7406\u8def\u5f84\u7684\u601d\u8def\u3002\u7ecf\u5178MCTS\u901a\u5e38\u5305\u542b\u4ee5\u4e0b\u56db\u4e2a\u6b65\u9aa4<\/span><span>[Browne \u7b49\uff0c2012]<\/span><span>\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u9009\u62e9\uff08Selection\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u4ece\u6839\u8282\u70b9\u5f00\u59cb\uff0cMCTS \u5728\u63a2\u7d22\u4e0e\u5229\u7528\u4e4b\u95f4\u8fdb\u884c\u6743\u8861\uff0c\u5e76\u636e\u6b64\u8ba1\u7b97\u5404\u5b50\u8282\u70b9\u7684\u6743\u91cd\u3002\u5e38\u89c1\u7684\u6743\u91cd\u8ba1\u7b97\u7b56\u7565\u5305\u62ec\u4e0a\u7f6e\u4fe1\u754c<\/span><span>\uff08Upper Confidence Bound, UCB\uff09<\/span><span>\u548c\u9884\u6d4b\u5668\u4e0a\u7f6e\u4fe1\u6811\u754c<\/span><span>\uff08Predictor Upper Confidence Tree Bound, PUCT\uff09[Rosin, 2011]<\/span><span>\u3002 UCB \u516c\u5f0f\u4e3a\uff1a<\/span><\/span><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.13333333333333333\" data-type=\"png\" data-w=\"1080\" style=\"vertical-align: baseline;width: 224px;height: auto !important\" width=\"224\" data-width=\"224px\" data-imgfileid=\"100227519\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-9212d77c4c189ede524c3d20da59902e.png\" \/>&nbsp;<\/span><\/sub><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;40b67eab-42a4-4298-95e1-4ad9f4b88efb&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;UCB(s,a)=Q(s,a)+c_p \\cdot \\pi_{prior}(a|s)\\sqrt{\\frac{\\log N(s)}{1+N(s,a)}}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff1b PUCT\u516c\u5f0f\u4e3a\uff1a<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;f7a0be05-469f-4bc9-bcc6-5f2b053f93df&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;PUCT(s,a)=Q(s,a)+c_p\\cdot \\pi_{prior}(a|s)\\cdot \\frac{\\sqrt{N(s)}}{1+N(s,a)}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.11851851851851852\" data-type=\"png\" data-w=\"1080\" style=\"vertical-align: baseline;width: 237px;height: auto !important\" width=\"237\" data-width=\"237px\" data-imgfileid=\"100227520\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-27253e8763f7f41ce07df1b0dcc7a455.png\" \/><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>\u3002\u5176\u4e2d<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;292c150b-6bf9-4df3-88ba-b85035d80b1f&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;Q(s,a)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>Q(s,a)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u8868\u793a\u4ece\u72b6\u6001&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;5e6123f1-7711-4bee-9bdc-ff3fb4a15541&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;snn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>s&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u91c7\u53d6\u884c\u52a8&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;83697fef-ff83-4ef1-a98f-7ce34972cff3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;ann&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>a&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u540e\u7684\u7d2f\u79ef\u5956\u52b1\uff0c<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;3f4874a8-96b8-412a-9246-f5687d55604d&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_{prior}(a|s)nnn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m96nxg6u15f\" data-pm-slice=\"0 0 []\"><span><span>\u03c0<\/span><\/span><sub><span>prior<\/span><\/sub><span><span>(a|s) &nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u4e3a\u5728\u72b6\u6001&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;b73470c2-3fb3-4a5e-9860-3d0f92188e6e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;snn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>s&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u4e0b\u9009\u62e9\u884c\u52a8&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;79dccc8d-0d94-4538-ab69-533869f64a7e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;ann&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>a&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u7684\u5148\u9a8c\u6982\u7387\uff0c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;01acfdea-96b4-4a17-917a-ffab44edab01&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;N(s)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>N(s)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u662f\u5f53\u524d\u4e0a\u4e0b\u6587\u4e2d\u72b6\u6001&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;049c0f87-caa8-46f5-9858-b8e5cb2f80b8&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;sn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>s<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u88ab\u63a2\u7d22\u7684\u6b21\u6570\uff0c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;dbeb5874-0dd2-4188-ba04-ce14307c5bd3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;N(s,a)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>N(s,a)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u662f\u884c\u52a8&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;04f8fee2-633a-4f5a-9843-d7b87776c9e8&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;ann&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>a<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u5728\u72b6\u6001&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;1f5ea0b0-7d13-4155-8d5d-9dda1f89fe67&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;sn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>s&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u88ab\u63a2\u7d22\u7684\u6b21\u6570\u3002\u6743\u91cd\u51fd\u6570\u540c\u65f6\u8003\u8651\u63a2\u7d22<\/span><span>\uff08\u672a\u8bbf\u95ee\u8282\u70b9\u5c06\u83b7\u5f97\u66f4\u9ad8\u63a2\u7d22\u503c\uff09<\/span><span>\u4e0e\u5229\u7528<\/span><span>\uff08\u5386\u53f2\u4e0a\u9ad8\u56de\u62a5\u8282\u70b9\u83b7\u5f97\u66f4\u9ad8\u5229\u7528\u503c\uff09<\/span><span>\u3002\u6bcf\u8f6e\u9009\u62e9\u4f1a\u63a8\u8fdb\u5230\u5f97\u5206\u6700\u9ad8\u7684\u5b50\u8282\u70b9\uff0c\u76f4\u81f3\u5230\u8fbe\u53f6\u8282\u70b9\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6269\u5c55 \uff08Expansion\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u4e00\u65e6\u5230\u8fbe\u53f6\u8282\u70b9\uff0c\u82e5\u8be5\u8282\u70b9\u4e0d\u662f\u7ec8\u6b62\u72b6\u6001<\/span><span>\uff08\u4f8b\u5982\u5c1a\u672a\u5f97\u51fa\u6700\u7ec8\u7b54\u6848\uff09<\/span><span>\uff0cMCTS \u5c06\u57fa\u4e8e\u5f53\u524d\u72b6\u6001\u6267\u884c\u65b0\u7684\u884c\u52a8\uff0c\u6269\u5c55\u751f\u6210\u591a\u4e2a\u5b50\u8282\u70b9\u3002\u6269\u5c55\u8d28\u91cf\u4e3b\u8981\u53d6\u51b3\u4e8e\u884c\u52a8\u7a7a\u95f4\u7684\u5b9a\u4e49\u3002\u5728\u56f4\u68cb\u4e2d\uff0c\u884c\u52a8\u4e3a\u843d\u5b50\uff1b\u800c\u5728\u5927\u8bed\u8a00\u6a21\u578b\u63a8\u7406\u4e2d\uff0c\u4e0d\u540c\u4efb\u52a1\u9700\u5b9a\u4e49\u4e0d\u540c\u7684\u884c\u52a8\u7a7a\u95f4\u3002\u5373\u4f7f\u5728\u540c\u4e00\u4efb\u52a1\u4e0b\uff0c\u4e0d\u540c\u7c92\u5ea6\u7684\u884c\u52a8\u7a7a\u95f4\u4e5f\u53ef\u80fd\u5bfc\u81f4\u5b8c\u5168\u4e0d\u540c\u7684\u641c\u7d22\u884c\u4e3a\u548c\u7ed3\u679c\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bc4\u4f30\uff08Evaluation\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u5230\u8fbe\u53f6\u8282\u70b9\u540e\uff0c\u9700\u5bf9\u5176\u72b6\u6001\u503c\u8fdb\u884c\u8bc4\u4f30\u3002\u5e38\u89c1\u65b9\u6cd5\u5305\u62ec\uff1a &nbsp; 1\uff09\u8499\u7279\u5361\u6d1b\u91c7\u6837\u4f30\u503c\uff1a\u5c06\u4ece\u6839\u5230\u5f53\u524d\u8282\u70b9\u7684\u72b6\u6001-\u884c\u52a8\u8def\u5f84\u4f5c\u4e3a\u4e0a\u4e0b\u6587\uff0c\u91c7\u6837\u591a\u4e2a\u5b8c\u6574\u8f68\u8ff9\uff0c\u5e76\u57fa\u4e8e\u5176\u7edf\u8ba1\u6307\u6807<\/span><span>\uff08\u5982\u6210\u529f\u7387\uff09<\/span><span>\u8ba1\u7b97\u72b6\u6001\u503c\u3002\u8be5\u65b9\u6cd5\u65e0\u504f\u4f46\u65b9\u5dee\u9ad8\u3001\u8ba1\u7b97\u6210\u672c\u5927\uff0c\u96be\u4ee5\u7528\u4e8e\u91c7\u6837\u4ee3\u4ef7\u9ad8\u7684\u4efb\u52a1\uff1b 2\uff09\u8bad\u7ec3\u4ef7\u503c\u6a21\u578b\u4f30\u503c\uff1a\u5229\u7528\u9884\u8bad\u7ec3\u6a21\u578b\u76f4\u63a5\u4f30\u8ba1\u72b6\u6001\u503c\uff0c\u4f46\u8bad\u7ec3\u4ef7\u503c\u6a21\u578b\u6bd4\u5956\u52b1\u6a21\u578b\u66f4\u5177\u6311\u6218\uff0c\u56e0\u4e3a\u5b83\u9700\u9884\u6d4b\u672a\u6765\u7684\u9884\u671f\u7d2f\u79ef\u5956\u52b1\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u53cd\u5411\u4f20\u64ad\uff08Backpropagation\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u4e00\u65e6\u5b8c\u6210\u72b6\u6001\u503c\u8bc4\u4f30\uff0cMCTS \u5c06\u4ece\u53f6\u8282\u70b9\u5411\u6839\u8282\u70b9\u56de\u4f20\u8be5\u503c\uff0c\u66f4\u65b0\u8def\u5f84\u4e2d\u6240\u6709\u8282\u70b9\u7684\u72b6\u6001\u4f30\u8ba1\u3002\u968f\u7740\u6a21\u62df\u6b21\u6570\u7684\u589e\u52a0\uff0c\u8fd9\u4e9b\u4f30\u503c\u6108\u53d1\u7cbe\u786e\u3002\u8be5\u8fc7\u7a0b\u91cd\u590d\u6267\u884c\uff0c\u76f4\u5230\u8fbe\u5230\u8bbe\u5b9a\u7684\u6700\u5927\u6a21\u62df\u6b21\u6570\uff0c\u6700\u7ec8\u5f62\u6210\u4e00\u68f5\u8bb0\u5f55\u6bcf\u4e2a\u8282\u70b9\u72b6\u6001\u503c\u4e0e\u8bbf\u95ee\u6b21\u6570\u7684\u641c\u7d22\u6811\u3002\u7531\u4e8e\u4e0d\u540c\u4efb\u52a1\u548c\u65b9\u6cd5\u7684\u8bbe\u8ba1\u5dee\u5f02\uff0cMCTS \u5728\u5927\u8bed\u8a00\u6a21\u578b\u63a8\u7406\u4e2d\u7684\u5b9e\u73b0\u65b9\u5f0f\u4e5f\u6709\u6240\u4e0d\u540c\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">3.2.3 \u4e8e\u957f\u601d\u7ef4\u94fe\u7684\u9690\u5f0f\u8bd5\u9519\u641c\u7d22<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u524d\u4e00\u8282\u4e2d\uff0c\u6211\u4eec\u4ecb\u7ecd\u4e86\u57fa\u4e8e\u77ed\u601d\u7ef4\u94fe<\/span><span>\uff08Short CoT\uff09<\/span><span>\u7684\u65b9\u6cd5\uff0c\u5176\u7279\u70b9\u662f\u6bcf\u4e00\u6b65\u63a8\u7406\u5747\u4e3a\u6b63\u786e\u3002\u6240\u6709\u88ab\u5224\u5b9a\u4e3a\u201c\u65e0\u524d\u9014\u201d<\/span><span>\uff08unpromising\uff09<\/span><span>\u7684\u63a8\u7406\u6b65\u9aa4\u5c06\u7531\u7b97\u6cd5\u63a7\u5236\u8fdb\u884c\u526a\u679d\uff0c\u4f7f\u641c\u7d22\u805a\u7126\u4e8e\u66f4\u6709\u5e0c\u671b\u7684\u5206\u652f\uff0c\u786e\u4fdd\u6700\u7ec8\u4ec5\u4fdd\u7559\u901a\u5411\u6b63\u786e\u89e3\u7684\u63a8\u7406\u8f68\u8ff9\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e0e\u6b64\u76f8\u5bf9\uff0c\u957f\u601d\u7ef4\u94fe<\/span><span>\uff08Long CoT\uff09[DeepSeek-AI \u7b49\uff0c2025\uff1bTeam \u7b49\uff0c2025\uff1bHou \u7b49\uff0c2025\uff1bXu \u7b49\uff0c2025]&nbsp;<\/span><span>\u5e76\u4e0d\u8981\u6c42\u6bcf\u4e00\u6b65\u90fd\u6b63\u786e\uff0c\u800c\u662f\u5141\u8bb8\u6a21\u578b\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u901a\u8fc7\u8bd5\u9519\u8fdb\u884c\u63a2\u7d22\u3002\u6211\u4eec\u5c06\u957f\u601d\u7ef4\u94fe\u5b9a\u4e49\u4e3a\u6a21\u62df\u4eba\u7c7b\u8ba4\u77e5\u8fc7\u7a0b\u7684\u601d\u7ef4\u94fe\u3002\u4e0e\u77ed\u601d\u7ef4\u94fe\u4e0d\u540c\uff0c\u957f\u601d\u7ef4\u94fe\u4e0d\u4ec5\u5305\u542b\u9010\u6b65\u903b\u8f91\u63a8\u7406\uff0c\u8fd8\u5728\u6574\u4e2a\u63a8\u7406\u8fc7\u7a0b\u4e2d\u96c6\u6210\u4e86\u81ea\u6211\u8bc4\u4f30\u548c\u81ea\u6211\u7ea0\u6b63\u7b49\u884c\u4e3a\u3002\u76f8\u5e94\u5730\uff0c\u6211\u4eec\u5c06R1\u548c\u7c7b\u4f3c\u5de5\u4f5c\u7528\u4e8e\u63a2\u7d22\u957f\u601d\u7ef4\u94fe\u7684\u8fc7\u7a0b\u79f0\u4e3a\u9690\u5f0f\u8bd5\u9519\u641c\u7d22<\/span><span>\uff08Implicit Trial-and-Error Search\uff09<\/span><span>\u3002\u5728\u6b64\u8fc7\u7a0b\u4e2d\uff0c\u4e0d\u9700\u8981\u5916\u90e8\u8bc4\u4f30\u5668\u6216\u7cbe\u70bc\u5668\uff1b\u5927\u8bed\u8a00\u6a21\u578b\u81ea\u4e3b\u89e6\u53d1\u5176\u81ea\u6211\u8bc4\u4f30\u673a\u5236\uff0c\u5e76\u5229\u7528\u81ea\u6211\u7ea0\u6b63\u548c\u56de\u6eaf\u80fd\u529b\u6765\u8c03\u6574\u63a8\u7406\u8def\u5f84\uff0c\u8fd9\u5bf9\u63d0\u9ad8\u63a8\u7406\u6027\u80fd\u81f3\u5173\u91cd\u8981\u3002 \u5c3d\u7ba1\u957f\u601d\u7ef4\u94fe\u5728\u8868\u9762\u4e0a\u5448\u73b0\u4e3a\u7ebf\u6027\u7ed3\u6784\uff0c\u4f46\u7531\u4e8e\u7ea0\u9519\u4e0e\u56de\u6eaf\u673a\u5236\u7684\u5b58\u5728\uff0c\u5176\u672c\u8d28\u903b\u8f91\u5f62\u6001\u66f4\u63a5\u8fd1\u4e8e\u6811\u72b6\u7ed3\u6784\u3002\u56e0\u6b64\uff0c\u6211\u4eec\u5c06\u5176\u79f0\u4e3a\u9690\u5f0f\u641c\u7d22\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u5b9e\u73b0 Long CoT \u7684\u751f\u6210\uff0cO1 \u9879\u76ee<\/span><span>&nbsp;[Huang \u7b49\uff0c2024]<\/span><span>&nbsp;\u63d0\u51fa\u4e86\u57fa\u4e8e\u84b8\u998f\u7684\u8bad\u7ec3\u65b9\u6cd5\u3002\u7136\u800c\uff0c\u8be5\u65b9\u6cd5\u4ec5\u6a21\u4eff\u4e86\u8bd5\u9519\u884c\u4e3a\u7684\u6a21\u5f0f\uff0c\u672a\u80fd\u771f\u6b63\u8d4b\u4e88\u6a21\u578b\u6df1\u5165\u601d\u8003\u7684\u80fd\u529b\u3002\u4e3a\u6b64\uff0c\u591a\u4e2a\u56e2\u961f\u9646\u7eed\u5f15\u5165\u57fa\u4e8e\u5f3a\u5316\u5b66\u4e60\u7684\u4f18\u5316\u7b56\u7565\u3002DeepSeek-R1 \u5728 PPO&nbsp;<\/span><span>[Schulman \u7b49\uff0c2017]&nbsp;<\/span><span>\u7684\u57fa\u7840\u4e0a\u91c7\u7528 GRPO<\/span><span>&nbsp;[Shao \u7b49\uff0c2024]&nbsp;<\/span><span>\u8fdb\u884c\u6539\u8fdb\uff1bKimi k1.5 \u501f\u52a9\u5728\u7ebf\u7b56\u7565\u955c\u50cf\u53d8\u4f53<\/span><span>&nbsp;[Lazic \u7b49\uff0c2019]<\/span><span>\uff1bT1 \u5e94\u7528\u4e86 RLOO \u7b97\u6cd5<\/span><span>&nbsp;[Ahmadian \u7b49\uff0c2024]&nbsp;<\/span><span>\u8fdb\u884c\u4f18\u5316\u3002\u8fd9\u4e9b\u4f18\u5316\u5c06\u5728 \u00a74 \u8be6\u7ec6\u8ba8\u8bba\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0c\u8fd9\u4e9b\u5f00\u6e90\u7814\u7a76\u666e\u904d\u9009\u62e9\u4ee5\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08Outcome Reward Model, ORM\uff09<\/span><span>\u66ff\u4ee3\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08Process Reward Model, PRM\uff09<\/span><span>\uff0c\u4ee5\u6307\u5bfc\u5927\u8bed\u8a00\u6a21\u578b\u81ea\u4e3b\u63a2\u7d22\u89e3\u7a7a\u95f4\u3002\u8fd9\u4e00\u7b56\u7565\u6027\u8f6c\u53d8\u4f7f\u6a21\u578b\u5728\u6027\u80fd\u4e0a\u8fbe\u5230\u4e86\u4e0e O1 \u76f8\u5f53\u751a\u81f3\u8d85\u8d8a\u7684\u6c34\u5e73\u3002\u653e\u5f03 PRM \u7684\u51b3\u5b9a\uff0c\u4e3b\u8981\u6e90\u4e8e\u5176\u6cdb\u5316\u80fd\u529b\u6709\u9650\u4ee5\u53ca\u5956\u52b1\u6b3a\u9a97<\/span><span>\uff08reward hacking\uff09<\/span><span>\u95ee\u9898\u4e25\u91cd\uff0c\u76f8\u5173\u5185\u5bb9\u5df2\u5728 \u00a73.2.1 \u4e2d\u8be6\u5c3d\u8ba8\u8bba\u3002 \u968f\u7740\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u7684\u4e0d\u65ad\u63a8\u8fdb\uff0c\u6a21\u578b\u751f\u6210\u7684\u601d\u7ef4\u94fe\u663e\u8457\u5ef6\u957f\uff0c\u63a8\u7406\u51c6\u786e\u6027\u4e0e\u6cdb\u5316\u80fd\u529b\u6301\u7eed\u63d0\u5347\u3002\u5728\u6b64\u8fc7\u7a0b\u4e2d\uff0c\u6a21\u578b\u751a\u81f3\u5c55\u73b0\u51fa\u201c\u987f\u609f\u65f6\u523b\u201d<\/span><span>[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\uff1a\u80fd\u591f\u81ea\u4e3b\u5b8c\u6210\u89e3\u7684\u9a8c\u8bc1\u4e0e\u66ff\u4ee3\u63a8\u7406\u8def\u5f84\u7684\u63a2\u7d22\uff0c\u8868\u73b0\u51fa\u63a5\u8fd1\u4eba\u7c7b\u201c\u7075\u611f\u5f0f\u201d\u8ba4\u77e5\u7684\u63a8\u7406\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">3.2.4 \u663e\u5f0f\u6811\u641c\u7d22\u4e0e\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u7684\u5dee\u5f02\u4e0e\u7edf\u4e00<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u663e\u5f0f\u6811\u641c\u7d22\u4e0e\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u7684\u5dee\u5f02<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6df1\u5165\u6bd4\u8f83\u4e4b\u524d\uff0c\u6211\u4eec\u7b80\u8981\u603b\u7ed3\u663e\u5f0f\u6811\u641c\u7d22\u7528\u4e8e\u77ed\u601d\u7ef4\u94fe\u548c\u8bd5\u9519\u641c\u7d22\u7528\u4e8e\u957f\u601d\u7ef4\u94fe\u5728\u7a0b\u5e8f\u4e0a\u7684\u533a\u522b\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.23227132579650564\" data-type=\"png\" data-w=\"973\" style=\"height: auto !important\" data-width=\"973\" data-height=\"226\" data-imgfileid=\"100227504\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-558b05afd9709dcf146c0a3a1fa8b0cc.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jlod01pbv\" data-pm-slice=\"0 0 []\"><span>\u56fe5\uff1a\u4e24\u79cd\u641c\u7d22\u8303\u5f0f\u7684\u8bf4\u660e\u3002\u663e\u5f0f\u6811\u641c\u7d22\u901a\u8fc7\u540c\u65f6\u6269\u5c55\u591a\u4e2a\u6b65\u9aa4\u5e76\u4f18\u5148\u5904\u7406\u6700\u6709\u5e0c\u671b\u7684\u6b65\u9aa4\u6765\u63d0\u9ad8\u641c\u7d22\u6548\u7387\uff0c\u4ece\u800c\u8bc6\u522b\u903b\u8f91\u8fde\u8d2f\u7684\u77ed\u601d\u7ef4\u94fe\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0c\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u901a\u8fc7\u9010\u6b65\u63a8\u7406\u3001\u8bd5\u9519\u548c\u56de\u6eaf\u5230\u8fc7\u53bb\u7684\u72b6\u6001\u6765\u63a2\u7d22\u66ff\u4ee3\u601d\u8def\uff0c\u6a21\u4eff\u4eba\u7c7b\u601d\u8003\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5982\u56fe5\u6240\u793a\uff0c\u663e\u5f0f\u6811\u641c\u7d22\u91c7\u7528\u542f\u53d1\u5f0f\u641c\u7d22\u7b97\u6cd5<\/span><span>\uff08\u5982\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u3001A*\u548c\u675f\u641c\u7d22\uff09<\/span><span>\u6765\u63a2\u7d22\u89e3\u51b3\u65b9\u6848\u7a7a\u95f4\u3002\u5728\u6bcf\u4e2a\u72b6\u6001\uff0c\u90fd\u4f1a\u6269\u5c55\u591a\u4e2a\u884c\u52a8\u4ee5\u83b7\u5f97\u5019\u9009\u72b6\u6001\uff0c\u5f62\u6210\u6811\u7ed3\u6784\u7684\u641c\u7d22\u8fc7\u7a0b\u3002\u5728\u6b64\u8fc7\u7a0b\u4e2d\uff0c\u63a8\u7406\u7cfb\u7edf\u88ab\u52a8\u8c03\u7528\u8bc4\u4f30\u548c\u526a\u679d\u7b49\u64cd\u4f5c\u3002\u751f\u6210\u7684\u601d\u7ef4\u94fe\u4e2d\u7684\u6bcf\u4e2a\u63a8\u7406\u6b65\u9aa4\u90fd\u4fdd\u8bc1\u662f\u6b63\u786e\u7684\uff0c\u800c\u8bc4\u4f30\u3001\u526a\u679d\u548c\u9519\u8bef\u7ea0\u6b63\u7b49\u884c\u4e3a\u4e0d\u4f1a\u5728\u77ed\u601d\u7ef4\u94fe<\/span><span>\uff08Short CoT\uff09<\/span><span>\u4e2d\u5448\u73b0\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u76f8\u6bd4\u4e4b\u4e0b\uff0c\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u4e0d\u4f9d\u8d56\u542f\u53d1\u5f0f\u7b97\u6cd5\u3002\u76f8\u53cd\uff0c\u5927\u8bed\u8a00\u6a21\u578b\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u4e3b\u52a8\u8c03\u7528\u81ea\u6211\u8bc4\u4f30\u548c\u81ea\u6211\u7ea0\u6b63\u7b49\u80fd\u529b\uff0c\u5e76\u7528\u81ea\u7136\u8bed\u8a00\u8868\u8fbe\u8fd9\u4e9b\u64cd\u4f5c\u3002\u56e0\u6b64\uff0c\u8bd5\u9519\u641c\u7d22\u4e2d\u7684\u957f\u601d\u7ef4\u94fe\u4e0d\u4ec5\u5305\u542b\u9010\u6b65\u63a8\u7406\uff0c\u8fd8\u878d\u5408\u81ea\u6211\u8bc4\u4f30\u3001\u81ea\u6211\u7ea0\u6b63\u4e0e\u56de\u6eaf\u64cd\u4f5c\uff0c\u4f7f\u6574\u4f53\u63a8\u7406\u8fc7\u7a0b\u66f4\u52a0\u900f\u660e\u4e14\u5177\u6709\u52a8\u6001\u8c03\u6574\u80fd\u529b\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6027\u80fd\u65b9\u9762\uff0c\u6811\u641c\u7d22\u4e5f\u6709\u6210\u529f\u7684\u5b9e\u73b0\uff0c\u5982rStar-Math<\/span><span>&nbsp;[Guan \u7b49\uff0c2025]<\/span><span>\uff0c\u5b83\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u548c\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u4ee5\u53ca\u81ea\u6211\u8fdb\u5316\u8bad\u7ec3\uff0c\u4f7f\u5c0f\u578b\u5927\u8bed\u8a00\u6a21\u578b\u5728\u6027\u80fd\u4e0a\u8fbe\u5230\u4e0eO1\u76f8\u5f53\u7684\u6c34\u5e73\u3002\u7136\u800c\uff0c\u6700\u8fd1\u7684\u5f00\u6e90\u9879\u76ee\uff0c\u5305\u62ecDeepSeek R1<\/span><span>&nbsp;[Team, 2024a]<\/span><span>\u548cKimi k1.5<\/span><span>&nbsp;[Team \u7b49\uff0c2025]<\/span><span>\uff0c\u90fd\u9009\u62e9\u4e86\u8bd5\u9519\u641c\u7d22\u8def\u7ebf\uff0c\u83b7\u5f97\u4e86\u663e\u8457\u7684\u6cdb\u5316\u80fd\u529b<\/span><span>[Yeo \u7b49\uff0c2025]<\/span><span>\u3002\u8fd9\u4e9b\u5f00\u6e90\u9879\u76ee\u653e\u5f03\u4f7f\u7528\u77ed\u601d\u7ef4\u94fe\u7684\u663e\u5f0f\u6811\u641c\u7d22\uff0c\u8f6c\u800c\u91c7\u7528\u957f\u601d\u7ef4\u94fe\u7684\u8bd5\u9519\u641c\u7d22\u7684\u539f\u56e0\u53ef\u4ee5\u4ece\u5176\u6280\u672f\u62a5\u544a\u4e2d\u63a8\u65ad\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9996\u5148\uff0c\u6811\u641c\u7d22\u901a\u5e38\u4f9d\u8d56\u5956\u52b1\u6a21\u578b\u6216\u4ef7\u503c\u6a21\u578b\u7b49\u9a8c\u8bc1\u5668\u63d0\u4f9b\u8bc4\u5206\uff0c\u4ee5\u5b9e\u73b0\u7ec6\u7c92\u5ea6\u7684\u8bc4\u4f30\u6307\u5bfc\u3002\u7136\u800c\uff0c\u8fd9\u4e9b\u9a8c\u8bc1\u5668\u666e\u904d\u5b58\u5728\u6cdb\u5316\u80fd\u529b\u5f31\u4e0e\u5956\u52b1\u6b3a\u9a97\u4e25\u91cd\u7684\u95ee\u9898\u3002\u8fd9\u53ef\u80fd\u5bfc\u81f4\u4e2d\u95f4\u8bc4\u4f30\u4e0d\u51c6\u786e\uff0c\u751a\u81f3\u56e0 LLM \u5229\u7528\u6377\u5f84\u6700\u5927\u5316\u5956\u52b1\u800c\u5f15\u53d1\u8bad\u7ec3\u5d29\u6e83\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0cR1\u3001Kimi k1.5\u548cT1\u5728\u641c\u7d22\u8fc7\u7a0b\u4e2d\u5229\u7528\u81ea\u6211\u8bc4\u4f30\u80fd\u529b\uff0c\u5e76\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u91c7\u7528\u57fa\u4e8e\u89c4\u5219\u7684\u7ed3\u679c\u5956\u52b1\uff0c\u663e\u8457\u51cf\u8f7b\u4e86\u5956\u52b1\u6b3a\u9a97\u5e76\u63d0\u9ad8\u4e86\u6cdb\u5316\u80fd\u529b\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0c\u6811\u641c\u7d22\u4e2d\u9a8c\u8bc1\u5668\u7684\u5206\u6570\u4ec5\u53cd\u6620\u63a8\u7406\u7684\u76f8\u5bf9\u8d28\u91cf\uff0c\u672a\u80fd\u6307\u51fa\u9519\u8bef\u6216\u539f\u56e0\uff0c\u5bfc\u81f4\u8bc4\u4f30\u8d28\u91cf\u6709\u9650\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0cR1\u548c\u7c7b\u4f3c\u9879\u76ee\u901a\u8fc7\u81ea\u6211\u8bc4\u4f30\u751f\u6210\u53e3\u5934\u8bc4\u4f30\u53cd\u9988\uff0c\u63d0\u4f9b\u66f4\u4e30\u5bcc\u548c\u66f4\u6709\u4fe1\u606f\u7684\u53cd\u9988\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6700\u540e\uff0c\u867d\u7136\u6811\u641c\u7d22\u53ef\u4ee5\u540c\u65f6\u63a2\u7d22\u591a\u6761\u8def\u5f84\uff0c\u4f46\u8fd9\u4e9b\u8def\u5f84\u662f\u72ec\u7acb\u7684\u3002\u56e0\u6b64\uff0c\u4e2d\u95f4\u7ecf\u9a8c\u65e0\u6cd5\u5728\u5b83\u4eec\u4e4b\u95f4\u5171\u4eab\uff0c\u964d\u4f4e\u4e86\u5e76\u884c\u63a8\u7406\u8fc7\u7a0b\u7684\u5229\u7528\u7387\u3002\u8fd9\u4f7f\u5f97\u6811\u641c\u7d22\u4e0e\u4eba\u7c7b\u63a8\u7406\u6709\u663e\u8457\u5dee\u5f02\uff0c\u56e0\u4e3a\u5728\u4eba\u7c7b\u63a8\u7406\u4e2d\uff0c\u8fc7\u53bb\u9519\u8bef\u7684\u89c1\u89e3\u6307\u5bfc\u540e\u7eed\u63a8\u7406\uff0c\u8fd9\u5728\u957f\u601d\u7ef4\u94fe<\/span><span>\uff08Long CoT\uff09<\/span><span>\u7684\u8bd5\u9519\u641c\u7d22\u4e2d\u53ef\u4ee5\u770b\u5230\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u867d\u7136\u4e0a\u8ff0\u8ba8\u8bba\u5f3a\u8c03\u4e86\u4e0e\u8bd5\u9519\u641c\u7d22\u76f8\u6bd4\uff0c\u663e\u5f0f\u6811\u641c\u7d22\u7684\u5f31\u70b9\uff0c\u4f46\u5e76\u4e0d\u610f\u5473\u7740\u8bd5\u9519\u641c\u7d22\u6ca1\u6709\u7f3a\u70b9\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u8bd5\u9519\u641c\u7d22\u4e2d\u5e94\u7528\u957f\u601d\u7ef4\u94fe\u53ef\u80fd\u5728\u4e24\u4e2a\u5173\u952e\u65b9\u9762\u5f15\u5165\u6548\u7387\u4f4e\u4e0b\u30021\uff09 \u5bf9\u4e8e\u7b80\u5355\u4efb\u52a1\uff0c\u957f\u601d\u7ef4\u94fe\u65b9\u6cd5\u5f80\u5f80\u8868\u73b0\u51fa<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8fc7\u5ea6\u601d\u8003<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u6b63\u5982<\/span><span>[Chen \u7b49\uff0c2024f]<\/span><span>\u6240\u6307\u51fa\u7684\uff0cQwQ<\/span><span>&nbsp;[Team, 2024b]<\/span><span>\u548cR1&nbsp;<\/span><span>[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u7b49\u65b9\u6cd5\u901a\u5e38\u4f1a\u63a2\u7d22\u591a\u4e2a\u6f5c\u5728\u89e3\u51b3\u65b9\u6848\uff0c\u5373\u4f7f\u521d\u59cb\u89e3\u51b3\u65b9\u6848\u901a\u5e38\u5df2\u7ecf\u8db3\u591f\u3002\u8fd9\u79cd\u8fc7\u5ea6\u63a2\u7d22\u884c\u4e3a\u4f1a\u5f15\u5165\u663e\u8457\u7684\u8ba1\u7b97\u8d44\u6e90\u6d88\u8017\u30022\uff09 \u5bf9\u4e8e\u590d\u6742\u4efb\u52a1\uff0cWang \u7b49<\/span><span>[2025a]<\/span><span>\u89c2\u5bdf\u5230QwQ\u548cR1\u5bb9\u6613<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u601d\u8003\u4e0d\u8db3<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u8fd9\u4e9b\u65b9\u6cd5\u5f80\u5f80\u5728\u672a\u5145\u5206\u9a8c\u8bc1\u5f53\u524d\u63a8\u7406\u8def\u5f84\u6709\u6548\u6027\u524d\u5373\u8fc7\u65e9\u653e\u5f03\uff0c\u5bfc\u81f4\u7b56\u7565\u9891\u7e41\u5207\u6362\uff0c\u5bfc\u81f4\u641c\u7d22\u8fc7\u7a0b\u4e0d\u7a33\u5b9a\u4e14\u6548\u7387\u4f4e\u4e0b\uff0c\u4f34\u968f\u7740\u4e0d\u5fc5\u8981\u7684\u5197\u957f\u63a8\u7406\u94fe\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0c\u57fa\u4e8e\u77ed\u601d\u7ef4\u94fe\u7684\u65b9\u6cd5\u4ea7\u751f\u66f4\u7b80\u6d01\u7684\u63a8\u7406\u8def\u5f84\uff0c\u63d0\u4f9b\u660e\u663e\u7684\u6548\u7387\u4f18\u52bf\u3002<\/span><span>[Wu \u7b49\uff0c2025b; Xie \u7b49\uff0c2025a]<\/span><span>\u8fdb\u4e00\u6b65\u8bba\u8bc1\uff0c\u66f4\u957f\u7684\u601d\u7ef4\u94fe\u4e0d\u4e00\u5b9a\u80fd\u6539\u5584\u63a8\u7406\u6027\u80fd\uff1b\u76f8\u53cd\uff0c\u6bcf\u4e2a\u6a21\u578b\u548c\u4efb\u52a1\u90fd\u5b58\u5728\u6700\u4f73\u601d\u7ef4\u94fe\u957f\u5ea6\u3002\u56e0\u6b64\uff0c\u8bd5\u9519\u641c\u7d22\u7684\u4f4e\u6548\u7387\u4e0d\u4ec5\u589e\u52a0\u4e86\u8bcd\u5143\u4f7f\u7528\u548c\u8ba1\u7b97\u6210\u672c\uff0c\u8fd8\u964d\u4f4e\u4e86\u6027\u80fd\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0c\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u4e25\u91cd\u4f9d\u8d56\u5927\u8bed\u8a00\u6a21\u578b\u7684\u81ea\u6211\u8bc4\u4f30\u548c\u81ea\u6211\u7ea0\u6b63\u80fd\u529b\u3002\u4e00\u65b9\u9762\uff0c\u8fd9\u4e9b\u80fd\u529b\u7684\u80cc\u666f\u673a\u5236\u4ecd\u662f\u9700\u8981\u8fdb\u4e00\u6b65\u7814\u7a76\u7684\u9886\u57df\uff1b\u53e6\u4e00\u65b9\u9762\uff0c\u8fd9\u4e9b\u80fd\u529b\u5728\u5927\u8bed\u8a00\u6a21\u578b\u7684\u5b66\u4e60\u8fc7\u7a0b\u4e2d\u5c1a\u672a\u88ab\u7279\u522b\u4f18\u5316\u3002R1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u3001kimi k1.5&nbsp;<\/span><span>[Team \u7b49\uff0c2025]<\/span><span>\u548cT1<\/span><span>&nbsp;[Hou \u7b49\uff0c2025]<\/span><span>\u7b49\u6a21\u578b\u5728\u540c\u4e00\u884c\u52a8\u7a7a\u95f4\u4e2d\u4ec5\u4f7f\u7528\u7ed3\u679c\u7ea7\u5956\u52b1\u540c\u65f6\u5b66\u4e60\u63a8\u7406\u3001\u8bc4\u4f30\u3001\u53cd\u601d\u548c\u9519\u8bef\u7ea0\u6b63\uff0c\u4f46\u7f3a\u4e4f\u4e13\u95e8\u7684\u5956\u52b1\u4fe1\u53f7\u6765\u6307\u5bfc\u8bc4\u4f30\u3001\u53cd\u601d\u548c\u7ea0\u6b63\u80fd\u529b\u7684\u5b66\u4e60\u3002\u7ed3\u679c\uff0c\u5927\u8bed\u8a00\u6a21\u578b\u4e2d\u7684\u8fd9\u4e9b\u80fd\u529b\u6ca1\u6709\u5f97\u5230\u7279\u522b\u4f18\u5316\uff0c\u4e00\u4e2a\u540e\u679c\u662f\uff0c\u5373\u4f7f\u5927\u8bed\u8a00\u6a21\u578b\u5728\u65e9\u671f\u9636\u6bb5\u8fdb\u884c\u4f4e\u8d28\u91cf\u7684\u53cd\u601d\u6216\u9519\u8bef\u7ea0\u6b63\uff0c\u53ea\u8981\u6700\u7ec8\u7b54\u6848\u6b63\u786e\uff0c\u5b83\u4eec\u4ecd\u7136\u53ef\u4ee5\u83b7\u5f97\u79ef\u6781\u5956\u52b1\u3002\u6b64\u5916\uff0c\u81ea\u6211\u8bc4\u4f30\u80fd\u529b\u7684\u4e0d\u8db3\u662fR1\u7b49\u65b9\u6cd5\u7ecf\u5e38\u65e0\u6cd5\u51c6\u786e\u8bc4\u4f30\u63a8\u7406\u8def\u5f84\uff0c\u4ece\u800c\u8fc7\u65e9\u653e\u5f03\u6709\u5e0c\u671b\u8def\u5f84\u7684\u539f\u56e0\u4e4b\u4e00\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u89e3\u51b3\u6548\u7387\u4f4e\u4e0b\u95ee\u9898\uff0cKimi k1.5&nbsp;<\/span><span>[Team \u7b49\uff0c2025]<\/span><span>\u5f15\u5165\u4e86\u957f\u5ea6\u60e9\u7f5a\u4f5c\u4e3a\u957f\u5ea6\u5956\u52b1\u7684\u4e00\u90e8\u5206\uff0c\u7528\u4e8e\u63a7\u5236\u54cd\u5e94\u957f\u5ea6\u3002Yeo \u7b49<\/span><span>[2025]<\/span><span>\u8bbe\u8ba1\u4e86\u4f59\u5f26\u5956\u52b1\u51fd\u6570\uff0c\u5bf9\u4e8e\u6b63\u786e\u54cd\u5e94\uff0c\u5956\u52b1\u968f\u7740\u957f\u5ea6\u7f29\u77ed\u800c\u589e\u52a0\uff0c\u800c\u5bf9\u4e8e\u9519\u8bef\u54cd\u5e94\uff0c\u5956\u52b1\u968f\u7740\u957f\u5ea6\u589e\u52a0\u800c\u589e\u52a0\u3002Luo \u7b49[2025]\u63d0\u51fa\u957f\u5ea6\u534f\u8c03\u5956\u52b1\uff0c\u6291\u5236\u8fc7\u957f\u7684\u54cd\u5e94\u3002\u9664\u5f15\u5165\u65b0\u7684\u5956\u52b1\u51fd\u6570\u5916\uff0cChen \u7b49[2024f]\u91c7\u7528\u504f\u597d\u5b66\u4e60\uff0c\u5c06\u6700\u77ed\u54cd\u5e94\u89c6\u4e3a\u6b63\u4f8b\uff0c\u6700\u957f\u54cd\u5e94\u89c6\u4e3a\u8d1f\u4f8b\uff0c\u4ece\u800c\u9f13\u52b1\u5927\u8bed\u8a00\u6a21\u578b\u751f\u6210\u66f4\u77ed\u7684\u601d\u7ef4\u94fe\uff0c\u6291\u5236\u8fc7\u957f\u601d\u7ef4\u94fe\u7684\u751f\u6210\u3002\u6211\u4eec\u5728\u88681\u4e2d\u7b80\u8981\u603b\u7ed3\u4e86\u8fd9\u4e24\u79cd\u641c\u7d22\u65b9\u6cd5\u7684\u5dee\u5f02\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.3757828810020877\" data-type=\"png\" data-w=\"958\" style=\"height: auto !important\" data-width=\"958\" data-height=\"360\" data-imgfileid=\"100227506\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-070c269d311c8849b7e4798e63c842bb.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 12px\" data-mpa-action-id=\"m96jl7is1v4t\" data-pm-slice=\"0 0 []\"><span>\u8868 1\uff1a\u4e24\u79cd\u641c\u7d22\u8303\u5f0f\u7684\u6bd4\u8f83\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u663e\u5f0f\u6811\u641c\u7d22\u4e0e\u9690\u5f0f\u8bd5\u9519\u641c\u7d22\u7684\u7edf\u4e00<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8fd9\u4e24\u79cd\u641c\u7d22\u7b56\u7565\u2014\u2014\u6811\u641c\u7d22\u548c\u8bd5\u9519\u641c\u7d22\u2014\u2014\u5404\u81ea\u63d0\u4f9b\u72ec\u7279\u4f18\u52bf\uff0c\u5f15\u53d1\u4e00\u4e2a\u5173\u952e\u95ee\u9898\uff1a\u5b83\u4eec\u4e4b\u95f4\u7684\u5173\u7cfb\u662f\u4ec0\u4e48\uff0c\u80fd\u5426\u7edf\u4e00\uff1f\u6211\u4eec\u4ece\u4e24\u4e2a\u89d2\u5ea6\u63a2\u8ba8\u8fd9\u4e2a\u95ee\u9898\u3002\u9996\u5148\uff0c\u6211\u4eec\u4ece\u884c\u52a8\u7a7a\u95f4\u7684\u89d2\u5ea6\u5206\u6790\u8fd9\u4e24\u79cd\u641c\u7d22\u7684\u76f8\u5173\u6027\uff0c\u91cd\u70b9\u5173\u6ce8\u4e0d\u540c\u5143\u64cd\u4f5c\u7b26\u7684\u89d2\u8272\u3002<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6700\u521d\uff0c\u4e24\u79cd\u7b56\u7565\u90fd\u5305\u62ec\u9010\u6b65\u63a8\u7406\uff0c\u77ed\u601d\u7ef4\u94fe\u4e3b\u8981\u7531\u903b\u8f91\u8fde\u8d2f\u7684\u63a8\u7406\u6b65\u9aa4\u7ec4\u6210\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u7136\u800c\uff0c\u4e24\u79cd\u7b56\u7565\u5728\u8bc4\u4f30\u673a\u5236\u4e0a\u6709\u663e\u8457\u5dee\u5f02\u3002\u663e\u5f0f\u6811\u641c\u7d22\u901a\u5e38\u9700\u8981\u5b66\u4e60\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b\u6216\u4ef7\u503c\u6a21\u578b\u6765\u8bc4\u4f30\u63a8\u7406\u8d28\u91cf\uff0c\u7531\u4e8e\u8fd9\u4e9b\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u5dee\u800c\u5f15\u5165\u9ad8\u504f\u5dee\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0c\u8bd5\u9519\u641c\u7d22\u4f9d\u9760\u5927\u8bed\u8a00\u6a21\u578b\u7684\u5185\u5728\u81ea\u6211\u8bc4\u4f30\u80fd\u529b\u6765\u8bc4\u4f30\u63a8\u7406\u72b6\u6001\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5173\u4e8e\u540e\u5904\u7406\uff0c\u6211\u4eec\u4ee5&#8221;\u7ea0\u6b63&#8221;\u4e3a\u4f8b\u8fdb\u884c\u5206\u6790\u3002\u6811\u641c\u7d22\u901a\u5e38\u7f3a\u4e4f\u76f4\u63a5\u7ea0\u6b63\u64cd\u4f5c\uff0c\u5c3d\u7ba1\u5206\u652f\u95f4\u5207\u6362\u53ef\u88ab\u89c6\u4e3a\u4e00\u79cd\u5f62\u5f0f\u4e0a\u7684\u9519\u8bef\u7ea0\u6b63\u3002\u7136\u800c\uff0c\u8fd9\u79cd&#8221;\u7ea0\u6b63&#8221;\u65e0\u6cd5\u5229\u7528\u5148\u524d\u5c1d\u8bd5\u7684\u5185\u5bb9\uff0c\u56e0\u4e3a\u5b83\u4eec\u4ec5\u662f\u5728\u5148\u524d\u6269\u5c55\u9636\u6bb5\u9884\u5148\u91c7\u6837\u7684\uff0c\u4e0d\u540c\u5c1d\u8bd5\u5f7c\u6b64\u72ec\u7acb\u3002\u4f8b\u5982\uff0c\u5728\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22\u7684\u6269\u5c55\u9636\u6bb5\uff0c\u540c\u65f6\u91c7\u6837\u591a\u4e2a\u5b50\u5019\u9009\u884c\u52a8\u3002\u5728\u968f\u540e\u7684\u9009\u62e9\u9636\u6bb5\uff0c\u5f53\u524d\u72b6\u6001\u4e2d\u9009\u62e9\u7684\u884c\u52a8\u53ef\u80fd\u4e0e\u524d\u4e00\u6a21\u62df\u4e2d\u7684\u4e0d\u540c\uff0c\u8fd9\u53ef\u89c6\u4e3a\u4e00\u79cd&#8221;\u7ea0\u6b63&#8221;\u3002\u7136\u800c\uff0c\u6b64\u6b21\u6a21\u62df\u4e2d\u9009\u62e9\u7684\u65b0\u884c\u52a8\u5e76\u975e\u57fa\u4e8e\u524d\u4e00\u6a21\u62df\u4e2d\u6240\u9009\u884c\u52a8\u7684\u8bc4\u4f30\u53cd\u9988\u751f\u6210\uff1b\u76f8\u53cd\uff0c\u4e24\u79cd\u884c\u52a8\u90fd\u662f\u5728\u6269\u5c55\u9636\u6bb5\u72ec\u7acb\u91c7\u6837\u7684\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u56e0\u6b64\uff0c\u4e0e\u8bd5\u9519\u641c\u7d22\u76f8\u6bd4\uff0c\u5f53\u524d\u6811\u641c\u7d22\u65b9\u6cd5\u7684\u4e3b\u8981\u9650\u5236\u4e3b\u8981\u5728\u4e8e\u5176\u53d7\u9650\u7684\u884c\u52a8\u7a7a\u95f4\u3002\u5982\u679c\u6269\u5c55\u6811\u641c\u7d22\u7684\u884c\u52a8\u7a7a\u95f4\u4ee5\u7eb3\u5165\u8bc4\u4f30\u548c\u7ea0\u6b63\u7b49\u884c\u52a8\uff0c\u7406\u8bba\u4e0a\u5b83\u4e5f\u53ef\u4ee5\u4fc3\u8fdb\u957f\u601d\u7ef4\u94fe\u7684\u63a2\u7d22<\/span><span>[Lin \u7b49\uff0c2025]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4ece\u63a8\u7406\u80fd\u529b\u8fdb\u5316\u7684\u89d2\u5ea6\u770b\uff0c\u957f\u601d\u7ef4\u94fe\u662f\u89e3\u51b3\u65b0\u95ee\u9898\u7684\u6709\u6548\u65b9\u6cd5\uff0c\u800c\u77ed\u601d\u7ef4\u94fe\u4ee3\u8868\u901a\u8fc7\u5bf9\u957f\u601d\u7ef4\u94fe\u7684\u6301\u7eed\u8bad\u7ec3\u5b9e\u73b0\u7684\u6700\u7ec8\u76ee\u6807\u3002\u5177\u4f53\u800c\u8a00\uff0c\u4eba\u7c7b\u5728\u9762\u5bf9\u590d\u6742\u4efb\u52a1\u65f6\uff0c\u901a\u5e38\u5148\u901a\u8fc7\u8bd5\u9519\u63a2\u7d22\uff0c\u6700\u7ec8\u5f52\u7eb3\u51fa\u9ad8\u6548\u7684\u6c42\u89e3\u8def\u5f84\u3002\u53ef\u4ee5\u5b66\u4e60\u8fd9\u4e9b\u6709\u6548\u9014\u5f84\u4ee5\u51cf\u5c11\u4e0d\u5fc5\u8981\u7684\u8bd5\u9519\uff0c\u4ece\u800c\u7f29\u77ed\u957f\u601d\u7ef4\u94fe\u3002\u56e0\u6b64\uff0c\u957f\u601d\u7ef4\u94fe\u53ef\u89c6\u4e3a\u5904\u7406\u590d\u6742\u4efb\u52a1\u7684\u521d\u59cb\u548c\u4e2d\u95f4\u89e3\u51b3\u65b9\u6848\u3002\u4e00\u65e6\u89e3\u51b3\u4efb\u52a1\uff0c\u4ece\u957f\u601d\u7ef4\u94fe\u63d0\u70bc\u7684\u77e5\u8bc6\u53ef\u7528\u4e8e\u5b66\u4e60\u77ed\u601d\u7ef4\u94fe\uff0c\u800c\u77ed\u601d\u7ef4\u94fe\u53c8\u4f5c\u4e3a\u5148\u9a8c\u77e5\u8bc6\uff0c\u5728\u5904\u7406\u66f4\u590d\u6742\u4efb\u52a1\u65f6\u51cf\u5c11\u957f\u601d\u7ef4\u94fe\u7684\u8bd5\u9519\u8fed\u4ee3\u3002\u603b\u4e4b\uff0c\u5f3a\u5927\u7684\u63a8\u7406\u7cfb\u7edf\u5e94\u5177\u5907\u52a8\u6001\u5207\u6362\u957f\u601d\u7ef4\u94fe\u4e0e\u77ed\u601d\u7ef4\u94fe\u7684\u80fd\u529b\uff0c\u4ee5\u5b9e\u73b0\u63a2\u7d22\u6027\u4e0e\u9ad8\u6548\u6027\u7684\u81ea\u9002\u5e94\u5e73\u8861\u3002<\/span><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>4. \u6a21\u578b\u8fdb\u5316<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6536\u96c6\u9ad8\u8d28\u91cf\u63a8\u7406\u6570\u636e\u540e\uff0c\u4e0b\u4e00\u6b65\u662f\u63d0\u5347\u7cfb\u7edf\u4e2d\u5404\u6a21\u578b\u80fd\u529b\uff0c\u4e3a\u540e\u7eed\u6570\u636e\u4f18\u5316\u6253\u4e0b\u57fa\u7840\u3002\u7531\u4e8e\u4efb\u52a1\u521b\u5efa\u5668\u7814\u7a76\u8f83\u5c11\uff0c\u672c\u6587\u805a\u7126\u4e8e\u63a8\u7406\u5668\u3001\u8bc4\u4f30\u5668\u548c\u540e\u5904\u7406\u5668\u7684\u8bad\u7ec3\u65b9\u6cd5\uff0c\u5e76\u4ece\u5f3a\u5316\u5b66\u4e60\u89c6\u89d2\u603b\u7ed3\u73b0\u6709\u5de5\u4f5c\uff0c\u6db5\u76d6\u884c\u4e3a\u514b\u9686<\/span><span>\uff08Behavior Cloning\uff09<\/span><span>\u3001\u504f\u597d\u4f18\u5316<\/span><span>\uff08Preference Optimization\uff09<\/span><span>\u548c\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08Reinforcement Learning\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">4.1 \u5f3a\u5316\u5b66\u4e60\u80cc\u666f\u77e5\u8bc6<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u4fbf\u4e8e\u6b63\u6587\u5f15\u7528\uff0c\u672c\u8282\u5148\u4ecb\u7ecd\u51e0\u79cd\u5178\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.1 \u4ece\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60\uff08RLHF\uff09\u51fa\u53d1<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9274\u4e8e ChatGPT<\/span><span>&nbsp;[Ouyang \u7b49\uff0c2022]<\/span><span>&nbsp;\u4e0e Claude OpenAI<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u7b49\u4ea7\u54c1\u7684\u6210\u529f\uff0c\u672c\u6587\u4ece\u5927\u8bed\u8a00\u6a21\u578b\u540e\u8bad\u7ec3\u4e2d\u7684\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08Reinforcement Learning with Human Feedback, RLHF\uff09[Ouyang \u7b49\uff0c2022]&nbsp;<\/span><span>\u4ecb\u7ecd\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u3002RLHF \u662f\u4e00\u79cd\u57fa\u4e8e\u504f\u597d\u7684\u5f3a\u5316\u5b66\u4e60\u6846\u67b6\uff0c\u5305\u542b\u4e24\u4e2a\u5173\u952e\u9636\u6bb5<\/span><span>&nbsp;[Wang \u7b49\uff0c2024h]<\/span><span>\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5956\u52b1\u5efa\u6a21\uff08Rewarding\uff09\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u6536\u96c6\u504f\u597d\u6570\u636e\u8bad\u7ec3\u5956\u52b1\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;37249548-89af-41c9-a694-effd5b84e204&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_{\\theta}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\" data-mpa-action-id=\"m96oibhu1bfo\" data-pm-slice=\"0 0 []\"><span><span>r<\/span><\/span><sub><span><span>\u03b8<\/span><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>\u3002\u65e9\u671f\u65b9\u6cd5\u901a\u8fc7\u4eba\u5de5\u6807\u6ce8\u540c\u4e00\u63d0\u793a\u4e0b\u7684\u591a\u4e2a\u54cd\u5e94\uff0c\u5e76\u6309\u8d28\u91cf\u6392\u5e8f\u4ee5\u8868\u793a\u4eba\u7c7b\u504f\u597d\u5173\u7cfb<\/span><\/span><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.37579617834394907\" data-type=\"png\" data-w=\"314\" style=\"vertical-align: baseline;width: 60px;height: auto !important\" width=\"60\" data-width=\"60px\" data-imgfileid=\"100227523\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-70523fa4abe5371b13dadc7f89bc43e1.png\" \/><\/span><\/sub><span style=\"font-size: 15px\"><span>\u3002&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;923c4227-1b4e-44a2-afb7-6c4ed66fd049&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y^w \\succ y^l&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u8bad\u7ec3\u76ee\u6807\u5982\u4e0b\uff1a<\/span><\/span><\/p>\n<section style=\"text-align: center;margin-bottom: 16px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.0824074074074074\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 338px;height: auto !important\" data-imgfileid=\"100227521\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-79295cd63b3f5dcf3f1cd0cd396d2de9.png\" \/><\/section>\n<\/li>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\"><\/ul>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7b56\u7565\u4f18\u5316\uff08Policy Optimization\uff09\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5c06\u5927\u8bed\u8a00\u6a21\u578b\u5fae\u8c03\u4e3a\u7b56\u7565\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;1639f83f-56a0-4c73-b973-2337aa13f477&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_\\text{ref}&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m96oo0sd3oj\" data-pm-slice=\"0 0 []\"><span><span>\u03c0<\/span><\/span><sub><span>ref<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff0c\u76ee\u6807\u662f\u6700\u5927\u5316\u5176\u6240\u83b7\u5956\u52b1\u3002\u8fc7\u7a0b\u5305\u62ec\u751f\u6210\u5185\u5bb9\u3001\u901a\u8fc7\u5956\u52b1\u6a21\u578b\u8bc4\u5206\uff0c\u5e76\u4f7f\u7528 PPO<\/span><span>&nbsp;[Schulman \u7b49\uff0c2017]&nbsp;<\/span><span>\u8fdb\u884c\u4f18\u5316\uff1a<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.05462962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 403px;height: auto !important\" data-imgfileid=\"100227522\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-627eafdec1ccec5a437f1c1a823ac733.png\" \/><\/section>\n<section style=\"margin-bottom: 0px\"><span><br \/><\/span><\/section>\n<p style=\"margin-bottom: 0px;line-height: 1.75em;margin-left: 32px;margin-right: 32px\"><span style=\"font-size: 15px\" data-mpa-action-id=\"m96oosy23l2\" data-pm-slice=\"0 0 []\"><span><span>\u5176\u4e2d\u53c2\u8003\u6a21\u578b\u03c0<\/span><\/span><sub><span>r<\/span><\/sub><sub><span>ef<\/span><\/sub><span><span>\u901a\u5e38\u7ecf\u76d1\u7763\u5fae\u8c03<\/span><span>\uff08SFT\uff09<\/span><span>\u540e\u51bb\u7ed3\u53c2\u6570\u3002KL\u6563\u5ea6\u9879<\/span><\/span><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.11851851851851852\" data-type=\"png\" data-w=\"1080\" style=\"vertical-align: baseline;width: 157px;height: auto !important\" width=\"157\" data-width=\"157px\" data-imgfileid=\"100227736\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-2cade975b61a4c14a6e012f93c717da5.png\" \/><\/span><\/sub><span><span>&nbsp;\u7528\u4e8e\u9650\u5236\u504f\u79bb\u5e76\u4fdd\u6301\u591a\u6837\u6027\uff0c\u9632\u6b62\u7b56\u7565\u584c\u7f29\u3002<\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1 RLHF \u6700\u521d\u7528\u4e8e\u5bf9\u9f50\u4efb\u52a1\uff0c\u8be5\u6846\u67b6\u4e5f\u53ef\u7528\u4e8e\u4f18\u5316\u63a8\u7406\u80fd\u529b\u3002\u901a\u8fc7\u6784\u5efa\u57fa\u4e8e\u63a8\u7406\u6b63\u786e\u6027\u7684\u504f\u597d\u6570\u636e\uff0c\u53ef\u5f15\u5bfc\u6a21\u578b\u504f\u5411\u751f\u6210\u6b63\u786e\u63a8\u7406\u8def\u5f84\uff0c\u6291\u5236\u9519\u8bef\u8def\u5f84\uff0c\u4ece\u800c\u63d0\u5347\u63a8\u7406\u8868\u73b0\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.2 \u4ece RLHF \u5230\u66f4\u9ad8\u7ec6\u7c92\u5ea6\u7684 PPO<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1 RLHF \u4f7f\u7528 PPO \u8fdb\u884c\u4f18\u5316\uff0c\u4f46\u5728\u5b9e\u9645\u5e94\u7528\u4e2d\uff0c\u7ecf\u5178 RLHF \u901a\u5e38\u88ab\u89c6\u4e3a\u4e00\u79cd\u8d4c\u535a\u673a<\/span><span>\uff08bandit\uff09<\/span><span>\u65b9\u6cd5\uff0c\u5373\u5c06\u6574\u53e5\u8bdd\u89c6\u4e3a\u4e00\u4e2a\u6574\u4f53\u52a8\u4f5c<\/span><span>&nbsp;[Zhong \u7b49\uff0c2024]<\/span><span>\u3002\u8fd9\u662f\u56e0\u4e3a RLHF \u4ec5\u4f9d\u8d56\u7ed3\u679c\u7ea7\u5956\u52b1\uff0c\u7f3a\u4e4f\u7ec6\u7c92\u5ea6\u7684\u4f18\u5316\u4fe1\u53f7\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u76f8\u8f83\u4e8e\u5bc6\u96c6\u5956\u52b1\uff0c\u7a00\u758f\u5956\u52b1\u4f1a\u663e\u8457\u589e\u52a0\u5b66\u4e60\u96be\u5ea6<\/span><span>&nbsp;[Andrychowicz \u7b49\uff0c2017]<\/span><span>\uff0c\u5728\u590d\u6742\u63a8\u7406\u4efb\u52a1\u4e2d\u5c24\u4e3a\u660e\u663e\u3002\u4f8b\u5982\uff0c\u5728\u591a\u6b65\u63a8\u7406\u4e2d\uff0c\u89e3\u7b54\u5931\u8d25\u5e76\u4e0d\u610f\u5473\u7740\u6bcf\u4e00\u6b65\u90fd\u51fa\u9519\uff0c\u53ef\u80fd\u524d\u51e0\u6b65\u662f\u6b63\u786e\u7684\uff0c\u4ec5\u540e\u7eed\u6b65\u9aa4\u6709\u8bef\u3002\u800c\u4ec5\u4f7f\u7528\u7ed3\u679c\u5956\u52b1\uff0c\u4f1a\u5728\u8bad\u7ec3\u4e2d\u6291\u5236\u90a3\u4e9b\u6b63\u786e\u7684\u4e2d\u95f4\u63a8\u7406\u3002\u4e3a\u5145\u5206\u53d1\u6325\u5f3a\u5316\u5b66\u4e60\u6f5c\u529b\uff0c\u9700\u5f15\u5165\u6b65\u9aa4\u7ea7\u751a\u81f3\u8bcd\u5143\u7ea7\u5956\u52b1\u4f5c\u4e3a\u66f4\u7ec6\u7c92\u5ea6\u7684\u4f18\u5316\u4fe1\u53f7\u3002\u8981\u5b9e\u73b0\u8fd9\u4e00\u70b9\uff0c\u9700\u8981\u91cd\u65b0\u5ba1\u89c6 PPO \u7b97\u6cd5\u7684\u8bbe\u8ba1\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>PPO<\/span><span>&nbsp;[Schulman \u7b49\uff0c2017]&nbsp;<\/span><span>\u662f\u4e00\u79cd\u7ecf\u5178\u7684 on-policy \u7b97\u6cd5\uff0c\u5728\u591a\u4e2a\u9886\u57df\u8868\u73b0\u51fa\u826f\u597d\u7684\u7a33\u5b9a\u6027\u4e0e\u6548\u679c\u3002\u5176\u901a\u7528\u8bad\u7ec3\u76ee\u6807\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.08703703703703704\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-imgfileid=\"100227624\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-75fb412190182e9d96f2790df23f75be.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5176\u4e2d y<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;1fe2fe9e-df68-4708-bb1d-9facbad0b83d&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>&nbsp;\u8868\u793a\u7b56\u7565\u6a21\u578b\u751f\u6210\u7684\u6587\u672c\uff0c&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;c0ea29c6-7792-4102-b82e-610fd9633f0c&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;|y|&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>|y<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;c0ea29c6-7792-4102-b82e-610fd9633f0c&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;|y|&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76tzmd&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;c0ea29c6-7792-4102-b82e-610fd9633f0c&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;|y|&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span>|<\/span><\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u8868\u793a\u5176\u5b57\u7b26\u6570\u3002\u4f18\u52bf\u51fd\u6570\u5b9a\u4e49\u4e3a&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;c65fb502-0cff-4fc6-96eb-529d6db243ff&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;A_t = Q(s_t, y_t) - V(s_t)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99g8zcwnv6\" data-pm-slice=\"0 0 []\"><span><span>A<\/span><\/span><sub><span>t<\/span><\/sub><span data-mpa-action-id=\"m99g9ck317z6\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99g9hp4gzp\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99g9mw021op\" data-pm-slice=\"0 0 []\"><span><span>&nbsp;= Q(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>, y<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>) &#8211; V(s<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>)&nbsp;<\/span><\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u662f\u4f18\u52bf\u51fd\u6570\uff0c\u5373\u5c06\u52a8\u4f5c\u503c\u51fd\u6570&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;5149f883-4b0b-4b8f-97f8-139797040aab&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;Q(s_t, y_t)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99g9ydu7ng\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99ga29jo0m\" data-pm-slice=\"0 0 []\"><span><span>Q(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>, y<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>)&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u5f52\u4e00\u5316\u81f3\u72b6\u6001\u503c\u57fa\u7ebf&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;b43c4cb1-3834-496f-b5c6-e43986c56769&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;V(s_t)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gabwp3ly\" data-pm-slice=\"0 0 []\"><span><span>V(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>)&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u4ee5\u964d\u4f4e\u65b9\u5dee\u3001\u63d0\u9ad8\u5b66\u4e60\u7a33\u5b9a\u6027\u3002\u5b9e\u9645\u8bad\u7ec3\u4e2d\u5e38\u7528\u5e7f\u4e49\u4f18\u52bf\u4f30\u8ba1\u52bf\u51fd\u6570<\/span><span>\uff08Generalized Advantage Estimation, GAE\uff09<\/span><span>\u5f62\u5f0f\uff0c\u4ee5\u5e73\u8861\u504f\u5dee\u4e0e\u65b9\u5dee\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.12314814814814815\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 366px;height: auto !important\" data-imgfileid=\"100227625\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-edb3d48b3bf6ea6a6a0e66b88d1139f0.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;2112ba39-420d-4399-81f8-832f4b6a5e84&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\begin{aligned}nA^{GAE}_t &amp;= \\delta_t + (\\gamma \\lambda)\\delta_{t+1} + ... + (\\gamma \\lambda)^T \\delta_{T-t-1}\\\\n\\delta_t &amp;= r_t + \\gamma V(s_{t+1}) - V(s_t)= Q(s_t, a_t) - V(a_t),n\\end{aligned}\\ \\ \\ \\ (5)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5176\u4e2d&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;75649f45-7c84-45d0-9e40-f76e9e56408e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\gamma&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>\u03b3&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u662f\u6298\u6263\u56e0\u5b50\uff0c<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;0c081ed8-33d8-4965-af63-cd73b0fdfd1e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\lambda&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>\u03bb&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u662f\u4f4d\u4e8e\u533a\u95f4 [0, 1]\u7684\u8d85\u53c2\u6570\u3002\u5f53&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;75649f45-7c84-45d0-9e40-f76e9e56408e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\gamma&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76thqi&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;75649f45-7c84-45d0-9e40-f76e9e56408e&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\\\gamma&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span>\u03b3 = 0&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u65f6\uff0c<\/span><\/span><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.10123456790123457\" data-type=\"png\" data-w=\"810\" style=\"vertical-align: baseline;width: 195px;height: auto !important\" data-width=\"294px\" data-croporisrc=\"https:\/\/mmbiz.qpic.cn\/mmbiz_png\/wibWV1DB7tWIfpdmZsNWdnel3miccib5nmQHfy10wQ6YzHWZ2k5YDm9iamnV79BBicgFibUxr9ZpytkKQ2pKpI2h77LA\/640?wx_fmt=png&amp;from=appmsg\" data-cropx1=\"270\" data-cropx2=\"1080\" data-cropy2=\"83.07692307692308\" data-imgfileid=\"100227626\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-5ea852f77b1f3519546934fc5371d9fe.jpg\" \/><\/span><\/sub><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1 PPO \u5728 RLHF \u4e2d\u8868\u73b0\u826f\u597d\uff0c\u4f46\u5176\u5bf9\u8bad\u7ec3\u8d44\u6e90\u7684\u9ad8\u8981\u6c42\u9650\u5236\u4e86\u5176\u5728\u63a8\u7406\u4f18\u5316\u4e2d\u7684\u5e94\u7528\u3002\u5b8c\u6574 PPO \u6846\u67b6\u5305\u542b\u56db\u4e2a\u6a21\u5757\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u7b56\u7565\u6a21\u578b\uff08policy model\uff09\u3001\u53c2\u8003\u6a21\u578b\uff08reference model\uff09\u3001\u4ef7\u503c\u6a21\u578b\uff08value model\uff09\u548c\u5956\u52b1\u6a21\u578b\uff08reward model\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002\u540e\u4e24\u8005\u7684\u521d\u59cb\u5316\u66f4\u8fdb\u4e00\u6b65\u589e\u52a0\u4e86\u8bad\u7ec3\u590d\u6742\u5ea6\uff0c\u5e76\u5f71\u54cd\u7b56\u7565\u6a21\u578b\u7a33\u5b9a\u6027\u3002\u4e3a\u7b80\u5316 PPO \u6846\u67b6\uff0c\u5df2\u6709\u7814\u7a76\u63d0\u51fa\u591a\u79cd\u6539\u8fdb\u65b9\u6cd5\uff0c\u5982\u8df3\u8fc7\u5bf9\u4ef7\u503c\u6a21\u578b&nbsp;<\/span><span>[Shao \u7b49\uff0c2024]&nbsp;<\/span><span>\u6216\u5956\u52b1\u6a21\u578b<\/span><span>&nbsp;[Rafailov \u7b49\uff0c2023]&nbsp;<\/span><span>\u7684\u663e\u5f0f\u5efa\u6a21\u4e0e\u8bad\u7ec3\u3002\u4e0b\u9762\u5c06\u4ecb\u7ecd\u82e5\u5e72\u4ee3\u8868\u6027\u5de5\u4f5c\uff0c\u5c55\u793a\u5982\u4f55\u7b80\u5316 PPO \u8bad\u7ec3\u6d41\u7a0b\u4ee5\u652f\u6301\u66f4\u9ad8\u6548\u7684\u63a8\u7406\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.3 \u4ece PPO \u5230 REINFORCE<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u964d\u4f4e\u8bad\u7ec3\u8d44\u6e90\u5f00\u9500\uff0c\u8fd1\u671f\u7814\u7a76\u91cd\u65b0\u5ba1\u89c6\u4e86 REINFORC<\/span><span>E [Sutton \u7b49\uff0c1999]&nbsp;<\/span><span>\u5728\u5927\u8bed\u8a00\u6a21\u578b\u4f18\u5316\u4e2d\u7684\u6f5c\u529b<\/span><span>&nbsp;[Li \u7b49\uff0c2023d\uff1bAhmadian \u7b49\uff0c2024]<\/span><span>\u3002REINFORCE \u662f\u4e00\u79cd\u7ecf\u5178\u7684\u7b56\u7565\u68af\u5ea6\u7b97\u6cd5\uff0c\u5176\u4f20\u7edf\u4f18\u5316\u76ee\u6807\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.16574074074074074\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 288px;height: auto !important\" data-imgfileid=\"100227627\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-241afea56d51d6cd101d18561ce4eae9.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;f495c1f1-ee89-472a-980e-b52393f15c07&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\sum_{t=1}^T\\mathbb{E}_{a_t\\sim\\pi_\\phi(a_t|s_t)}[R(s_t,a_t)\\log\\pi_\\phi(a_t|s_t)],\\ \\ \\ \\ (6)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5176\u4e2d\uff0c&nbsp;<\/span><\/span><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.25475285171102663\" data-type=\"png\" data-w=\"1052\" style=\"vertical-align: baseline;width: 140px;height: auto !important\" width=\"140\" data-width=\"140px\" data-imgfileid=\"100227628\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-7cc6f36a334c5782ee45620af0a3a027.png\" \/><\/span><\/sub><span style=\"font-size: 15px\"><span>&nbsp;\u4e3a\u7d2f\u79ef\u5956\u52b1\uff0c\u7528\u4e8e\u63a7\u5236\u7b56\u7565\u68af\u5ea6\u66f4\u65b0\u7684\u65b9\u5411\u4e0e\u6b65\u957f\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u7136\u800c\uff0cREINFORCE\u5b58\u5728\u9ad8\u65b9\u5dee\u95ee\u9898\uff0c\u5c24\u5176\u4f53\u73b0\u5728&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;965e4b2a-b1e8-4854-95ca-0071ef8c6c48&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;R(s_t,a_t)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gm45fv60\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99gm8enm53\" data-pm-slice=\"0 0 []\"><span><span>R(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>,a<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e0a\uff0c\u5bfc\u81f4\u8bad\u7ec3\u8fc7\u7a0b\u4e0d\u7a33\u5b9a\u3002\u4e3a\u964d\u4f4e\u65b9\u5dee\uff0c\u5e38\u901a\u8fc7\u66ff\u6362\u4e3a\u52a8\u4f5c\u503c\u51fd\u6570&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;88631c1a-fa82-49c9-a202-bcfca77788a8&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;Q(s_t,a_t)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gmvvw1w74\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99gn0057fj\" data-pm-slice=\"0 0 []\"><span><span>Q(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>,a<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6216\u4f18\u52bf\u51fd\u6570&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;43cf9e5b-d564-4a04-85e1-2e21affc4ff7&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;A(s_t,a_t)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gmpuk1j7i\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99gmshk229i\" data-pm-slice=\"0 0 []\"><span><span>A(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>,a<\/span><\/span><\/span><sub><span>t<\/span><\/sub><span><span>)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff08\u5982 PPO \u6240\u91c7\u7528\uff09<\/span><span>\uff0c\u6216\u5f15\u5165\u57fa\u7ebf\u9879\u5b9e\u73b0\u4fee\u6b63\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.06944444444444445\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 308px;height: auto !important\" data-imgfileid=\"100227629\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-14f183abbe37a10a7b01c16dece0d030.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;380e7df0-8cdb-4b92-8210-fb5310b9c1ca&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\mathbb{E}_{a_t\\sim\\pi_\\phi(.|s_t)}[(R(s_t,a_t)-b)\\log\\pi_\\phi(a_t|s_t)].\\ \\ \\ \\ (7n)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u57fa\u7ebf&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;ce543a7c-a9c6-4ced-ab61-226fdd7a78f3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;b(s_t)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gpy671yro\" data-pm-slice=\"0 0 []\"><span><span>b(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6709\u591a\u79cd\u5b9e\u73b0\u65b9\u5f0f\u3002\u4e3a\u907f\u514d\u989d\u5916\u8bad\u7ec3\u4ef7\u503c\u6a21\u578b\uff0cReMax<\/span><span>&nbsp;[Li \u7b49\uff0c2023d]&nbsp;<\/span><span>\u91c7\u7528\u6982\u7387\u6700\u9ad8\u52a8\u4f5c\u7684\u5956\u52b1\u4f5c\u4e3a\u57fa\u7ebf\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.06851851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 335px;height: auto !important\" data-imgfileid=\"100227630\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-47e2d3939d0e864f866042b8d362d750.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;92d37696-e59e-4a1f-a465-334190e84251&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;b(s_t)=r(s_t,a_t),\\quad a_t\\in\\arg\\max\\pi_\\phi(.|s_t).\\ \\ \\ \\ (8)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Ahmadian \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u63d0\u51fa RLOO<\/span><span>\uff08REINFORCE Leave-One-Out\uff09<\/span><span>\u4f30\u8ba1\u5668\u3002\u5bf9\u4e8e\u4e00\u4e2a\u4efb\u52a1 qqq\uff0cRLOO \u91c7\u6837\u591a\u4e2a\u54cd\u5e94&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;bae86bfd-8232-43c8-8265-9d3cb0352646&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\{r_1,r_2,...,r_K\\}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gskrq1mnp\" data-pm-slice=\"0 0 []\"><span><span>{r<\/span><\/span><sub><span>1<\/span><\/sub><span data-mpa-action-id=\"m99gsowzgx6\" data-pm-slice=\"0 0 []\"><span><span>,r<\/span><\/span><sub><span>2<\/span><\/sub><span data-mpa-action-id=\"m99gstam1ann\" data-pm-slice=\"0 0 []\"><span><span>,&#8230;,r<\/span><\/span><sub><span>K<\/span><\/sub><span><span>}<\/span><\/span><\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u5e76\u4f7f\u7528\u9664\u5f53\u524d\u54cd\u5e94\u5916\u5176\u4f59\u8f68\u8ff9\u7684\u5e73\u5747\u503c\u4f5c\u4e3a\u57fa\u7ebf\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.2037037037037037\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 211px;height: auto !important\" data-imgfileid=\"100227631\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-ab1794e6747741971da76c4b12d3caf2.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;93b0ebe1-b792-4368-bd61-2a9dc3d83089&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;b(r^i)=\\frac1{k-1}\\sum_{j\\neq i}R(r^j,x).\\ \\ \\ \\ (9)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u4ec5\u6709\u7ed3\u679c\u7ea7\u5956\u52b1\u7684 Bandit \u573a\u666f\u4e2d\uff0cAhmadian \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u53d1\u73b0 RLOO \u4f18\u4e8e PPO\u3002\u5176\u539f\u56e0\u53ef\u80fd\u5728\u4e8e\uff0c\u7ecf\u8fc7\u5927\u89c4\u6a21\u9884\u8bad\u7ec3\u4e0e\u5fae\u8c03\u7684\u5927\u8bed\u8a00\u6a21\u578b\u672c\u8eab\u5c31\u662f\u5f3a\u521d\u59cb\u5316\u7b56\u7565\uff0c\u91c7\u6837\u8f68\u8ff9\u7684\u53e5\u5b50\u7ea7\u65b9\u5dee\u8f83\u5c0f\u3002\u540c\u65f6\uff0cRLOO \u901a\u8fc7\u91c7\u6837\u4f30\u8ba1\u4ef7\u503c\u51fd\u6570\uff0c\u51cf\u5c11\u4e86\u65b9\u5dee\uff0c\u5e76\u907f\u514d\u4e86\u663e\u5f0f\u5b66\u4e60\u4ef7\u503c\u51fd\u6570\u6240\u5e26\u6765\u7684\u504f\u5dee\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u7136\u800c\uff0c\u8fd9\u4e00\u4f18\u52bf\u4e3b\u8981\u4f53\u73b0\u5728 bandit \u8bbe\u7f6e\u4e2d\u3002\u5bf9\u4e8e\u591a\u8df3\u63a8\u7406\u7b49\u9700\u6b65\u9aa4\u7ea7\u6216\u8bcd\u5143\u7ea7\u5bc6\u96c6\u5956\u52b1\u7684\u4efb\u52a1\uff0cRLOO \u53ef\u80fd\u56e0\u65b9\u5dee\u8fc7\u5927\u800c\u8868\u73b0\u4e0d\u4f73\u3002\u867d\u7136 REINFORCE \u7b80\u5316\u4e86\u7ed3\u6784\u5e76\u964d\u4f4e\u6210\u672c\uff0c\u4f46\u5176\u7a33\u5b9a\u6027\u4ecd\u53d7\u9650\u4e8e\u5956\u52b1\u4fe1\u53f7\u7684\u7a00\u758f\u7a0b\u5ea6\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.17407407407407408\" data-type=\"png\" data-w=\"1080\" style=\"width: 100%;height: auto !important\" data-width=\"1253\" data-height=\"218\" data-backw=\"562\" data-backh=\"98\" data-imgfileid=\"100227507\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-15e7c22a24cbff5b9b880ec09021ea38.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jjweh1wrz\" data-pm-slice=\"0 0 []\"><span>\u8868 2\uff1a\u4e94\u79cd\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u5bf9\u6bd4\uff1aPPO\u3001RLOO\u3001GRPO\u3001DPO\u3001PRIME &nbsp;\uff08VM\uff1a\u4ef7\u503c\u6a21\u578b\uff1bRM\uff1a\u5956\u52b1\u6a21\u578b\uff1bRefM\uff1a\u53c2\u8003\u6a21\u578b\uff09<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.4 \u4ece PPO \u5230 GRPO<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u5177\u5907\u6b65\u9aa4\u7ea7\u6216\u8bcd\u5143\u7ea7\u5956\u52b1\u7684\u573a\u666f\u4e0b\uff0cPPO \u662f\u5fae\u8c03\u7b56\u7565\u6a21\u578b\u7684\u7406\u60f3\u9009\u62e9\uff0c\u56e0\u5176\u901a\u8fc7\u4f18\u52bf\u51fd\u6570\u4e0e\u88c1\u526a\u64cd\u4f5c\u4fdd\u969c\u8bad\u7ec3\u7a33\u5b9a\u6027\u3002\u4f46\u5982\u516c\u5f0f\uff085\uff09\u6240\u793a\uff0c\u8ba1\u7b97\u4f18\u52bf\u51fd\u6570&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;93158116-c829-4b38-a025-bdf203a356d4&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;A_t^{GAE}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.5227272727272727\" data-type=\"png\" data-w=\"176\" style=\"vertical-align: baseline;width: 44px;height: auto !important\" width=\"44\" data-width=\"44px\" data-imgfileid=\"100227632\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-4346d8f199d52b3a21647f9aa80ac778.png\" \/><\/span><\/sub><span><span>&nbsp;\u9700\u540c\u65f6\u4f9d\u8d56\u5956\u52b1\u6a21\u578b\u4e0e\u4ef7\u503c\u6a21\u578b&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;e2b4f9f2-e04f-4ccb-8f70-f7ffd5076f20&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;V(s_t)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gvfq42kv\" data-pm-slice=\"0 0 []\"><span><span>V(s<\/span><\/span><sub><span>t<\/span><\/sub><span><span>)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u3002\u901a\u5e38\uff0c\u4ef7\u503c\u6a21\u578b\u89c4\u6a21\u4e0e\u63a8\u7406\u5668\u76f8\u5f53\uff0c\u8bad\u7ec3\u56f0\u96be\u3001\u6613\u4e0d\u7a33\u5b9a\uff0c\u4e14\u5927\u5e45\u589e\u52a0\u8d44\u6e90\u8d1f\u62c5\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\" data-mpa-action-id=\"m9cucm9hn3w\" data-pm-slice=\"0 0 []\"><span style=\"font-size: 15px\"><span>\u4e3a\u6b64\uff0cShao \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u63d0\u51fa GRPO\uff0c\u5229\u7528\u8499\u7279\u5361\u6d1b<\/span><span>\uff08MC\uff09<\/span><span>\u91c7\u6837\u66ff\u4ee3\u4ef7\u503c\u6a21\u578b\uff0c\u5bf9 PPO \u8fdb\u884c\u6539\u9020\u3002\u5177\u4f53\u800c\u8a00\uff0c\u5bf9\u6bcf\u4e2a\u4efb\u52a1&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;7c73aec9-ffdd-43ac-9900-84eb400495df&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;qn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>q<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0cGRPO \u540c\u65f6\u91c7\u6837&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;6485ab51-d69d-4039-9144-fc03c551f6f5&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;Gn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>G<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e2a\u5b8c\u6574\u89e3&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;8bd58508-18ca-4ad1-a83f-213f13f22fc3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot; y_1, y_2, ..., y_Gn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gwe7v1ar9\" data-pm-slice=\"0 0 []\"><span><span>&nbsp;y<\/span><\/span><sub><span>1<\/span><\/sub><span data-mpa-action-id=\"m99gwm1b1cqs\" data-pm-slice=\"0 0 []\"><span><span>, y<\/span><\/span><sub><span>2<\/span><\/sub><sub><span><span>,<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;8bd58508-18ca-4ad1-a83f-213f13f22fc3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot; y_1, y_2, ..., y_Gn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99gwe7v1ar9\" data-pm-slice=\"0 0 []\"><span data-mpa-action-id=\"m99gxg273zg\" data-pm-slice=\"2 3 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76tis7&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;8bd58508-18ca-4ad1-a83f-213f13f22fc3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot; y_1, y_2, ..., y_G\\n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-mpa-action-id&quot;:&quot;m99gwe7v1ar9&quot;,&quot;data-pm-slice&quot;:&quot;0 0 []&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span><span>\u2026,<\/span><\/span><\/span><\/span><\/span><\/span><\/sub><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\" data-mpa-action-id=\"m9cud2goeq8\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sub><span>G<\/span><\/sub><span><span>\uff0c\u6839\u636e\u5956\u52b1\u51fd\u6570\u5bf9\u6bcf\u4e2a\u89e3\u7ed9\u4e88\u5956\u52b1\u3002<\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4e24\u79cd\u5956\u52b1\u7248\u672c\uff1a<\/span><\/span><\/strong><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4f7f\u7528 PRM \u65f6<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u4e3a\u6bcf\u6b65\u5206\u914d\u5956\u52b1\uff0c\u6784\u9020\u5956\u52b1\u96c6\uff1a<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;9ea2d41a-e142-4f37-a601-83ef05d34650&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;R=\\{r_1^{index(1)},...,r_1^{index(k_1)},...,r_G^{index(1)},...,r_G^{index(\\dot{k}_G)}\\}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<section><span><br \/><\/span><\/section>\n<section style=\"text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.06851851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 360px;height: auto !important\" data-imgfileid=\"100227633\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-a822cd7641b5daad532849c990b92170.png\" \/><\/section>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;9ea2d41a-e142-4f37-a601-83ef05d34650&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;R=\\{r_1^{index(1)},...,r_1^{index(k_1)},...,r_G^{index(1)},...,r_G^{index(\\dot{k}_G)}\\}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u5176\u4e2d&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;a96bb8a4-1fdb-45ce-a530-fe38b4d2bc93&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;k_in&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>k<\/span><\/span><\/span><\/span><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span>i<\/span><\/sub><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u8868\u793a&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;068d41c3-6187-49de-a3f2-63287c3951db&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y_in&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>y<\/span><\/span><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span>i<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e2d\u7684\u6b65\u9aa4\u6570\uff0c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;d97640c7-67d9-481f-8682-0ed184699af0&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_i^{index(j)}&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.44642857142857145\" data-type=\"png\" data-w=\"336\" style=\"vertical-align: baseline;width: 48px;height: auto !important\" width=\"48\" data-width=\"48px\" data-imgfileid=\"100227634\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-59106af2dd5454f521063d434de694a4.png\" \/><\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u8868\u793a&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;119c6632-ea2a-469d-a665-b79c52323ed6&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y_in&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>y<\/span><\/span><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span>i<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u7684\u7b2c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;cf2ed0bf-866a-4ed0-85c6-762bb14b59e7&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;jn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>j<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6b65\u4e2d\u7ed3\u675f\u8bcd\u5143\u7684\u7d22\u5f15\u3002\u6b64\u65f6\uff0c\u4f18\u52bf\u51fd\u6570\u8ba1\u7b97\u5982\u4e0b\uff1a&nbsp;<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.31296296296296294\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 258px;height: auto !important\" data-imgfileid=\"100227635\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-6fdf2bc6bb3d9fef557c4f5faa278b55.png\" \/><\/section>\n<section><span><br \/><\/span><\/section>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4f7f\u7528 ORM \u65f6<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u4e3a\u6bcf\u4e2a\u89e3\u5206\u914d\u4e00\u4e2a\u5956\u52b1&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;271c1280-2f32-4d05-9ad0-6d8d386c8e21&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_in&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99h64oz1mu\" data-pm-slice=\"0 0 []\"><span><span>r<\/span><\/span><sub><span>i<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u4f18\u52bf\u51fd\u6570\u7b80\u5316\u4e3a\uff1a<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<section style=\"margin-bottom: 0px\"><span><br \/><\/span><\/section>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.17592592592592593\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 208px;height: auto !important\" data-imgfileid=\"100227636\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-f23e9299ce3fa6ad17d92269f47e1d37.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;d45dc7cb-c23a-4c2b-9768-dffc473562e6&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\tilde{A}_{i,t} = \\tilde{r}_i = \\frac{r_i - \\text{mean}(R)}{\\text{std}(R)}.\\ \\ \\ \\ (11)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u65e0\u8bba\u4f7f\u7528\u54ea\u79cd\u6a21\u578b\uff0cGRPO\u90fd\u5c06\u7ec4\u5185\u5956\u52b1\u6807\u51c6\u5316\uff0c\u4ee5\u5e73\u5747\u503c\u66ff\u4ee3\u4ef7\u503c\u6a21\u578b\u4f5c\u4e3a\u57fa\u7ebf\uff0c\u4ece\u800c\u5728\u8bad\u7ec3\u4e2d\u6291\u5236\u4f4e\u8d28\u91cf\u884c\u4e3a\uff0c\u5f3a\u5316\u9ad8\u8d28\u91cf\u884c\u4e3a\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6700\u7ec8\uff0cGRPO\u7684\u4f18\u5316\u76ee\u6807\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.15462962962962962\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-imgfileid=\"100227637\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-3d4abefb35acf514cc67a19930aa4ade.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;7aa46591-a6d6-45d6-9035-6ee6d19486e7&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\begin{aligned}n &amp; \\mathbb{E}[q\\sim P(Q),\\{y_i\\}_{i=1}^G\\sim\\pi_{\\phi_{old}}(y_i|q)]\\frac{1}{G}\\sum_{i=1}^G\\frac{1}{|y_i|}\\sum_{t=1}^{|y_i|} \\\\n &amp; \\left\\{\\min[\\frac{\\pi_\\phi(y_{i,t}|q,y_{i,&lt;t})}{\\pi_{\\phi_{old}}(y_{i,t}|q,y_{i,&lt;t})}\\tilde{A}_{i,t},\\operatorname{clip}(\\frac{\\pi_\\phi(y_{i,t}|q,y_{i,&lt;t})}{\\pi_{\\phi_{old}}(y_{i,t}|q,y_{i,&lt;t})},1-\\epsilon,1+\\epsilon)\\tilde{A}_{i,t}]-\\beta\\mathbb{D}_{KL}[\\pi_\\theta||\\pi_{ref}]\\right\\},n\\end{aligned}n\\ \\ \\ \\ (12)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5176\u4e2d<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;81cc3c5c-3d7d-4428-b471-2b5fa1116f74&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_n{\\phi_{old}}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.4895833333333333\" data-type=\"png\" data-w=\"192\" style=\"vertical-align: baseline;width: 41px;height: auto !important\" width=\"41\" data-width=\"41px\" data-imgfileid=\"100227640\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-a080b368c32fc84923d4e1221b1c8353.png\" \/><\/span><\/sub><\/span><span><span>\u662f\u4e0a\u4e00\u8f6e\u7684\u53c2\u8003\u6a21\u578b\u3002\u6b64\u5916\uff0c\u4e3a\u63d0\u9ad8\u7a33\u5b9a\u6027\uff0cGRPO \u5728 PPO \u57fa\u7840\u4e0a\u52a0\u5165 KL \u6563\u5ea6\u9879\uff0c\u4f46\u91c7\u7528 Schulman&nbsp;<\/span><span>[2020]&nbsp;<\/span><span>\u63d0\u51fa\u7684\u65e0\u504f\u4f30\u8ba1\u65b9\u6cd5\uff1a<\/span><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.09259259259259259\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 482px;height: auto !important\" data-imgfileid=\"100227638\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-b2198205ac9f7591e5e548a1053cedc9.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;cf8c2dde-592a-4811-9f4a-61fa3d4e6353&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\mathbb{D}_{KL}[\\pi_\\phi||\\pi_{\\phi_{old}}]=\\frac{\\pi_{\\phi_{old}}(y_{i,t}|q,y_{i,&lt;t})}{\\pi_\\phi(y_{i,t}|q,y_{i,&lt;t})}-\\log\\frac{\\pi_{\\phi_{old}}(y_{i,t}|q,y_{i,&lt;t})}{\\pi_\\phi(y_{i,t}|q,y_{i,&lt;t})}-1.\\ \\ \\ \\ (13)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u603b\u7ed3\uff0cGRPO \u901a\u8fc7 MC \u91c7\u6837\u4f30\u7b97\u4f18\u52bf\u51fd\u6570\uff0c\u65e0\u9700\u5f15\u5165\u4ef7\u503c\u6a21\u578b\uff0c\u7b80\u5316\u4e86\u8bad\u7ec3\u67b6\u6784\uff0c\u4ec5\u9700\u7b56\u7565\u6a21\u578b\u3001\u53c2\u8003\u6a21\u578b\u548c\u5956\u52b1\u6a21\u578b\u3002\u5b83\u7ee7\u627f\u4e86 PPO \u7684\u7a33\u5b9a\u6027\uff0c\u540c\u65f6\u5229\u7528 LLM \u7684\u5f3a\u5148\u9a8c\u6027\u5f31\u5316\u4e86\u65b9\u5dee\u95ee\u9898\u3002MC \u91c7\u6837\u5e26\u6765\u65e0\u504f\u4f30\u8ba1\uff0c\u7ec4\u5185\u6807\u51c6\u5316\u5f3a\u5316\u9ad8\u8d28\u91cf\u8f68\u8ff9\uff0c\u663e\u8457\u63d0\u5347\u8bad\u7ec3\u7a33\u5b9a\u6027\u3002GRPO \u5df2\u5728\u591a\u4e2a\u7c7b O1 \u7684\u5f00\u6e90\u9879\u76ee\u4e2d\u6210\u529f\u5e94\u7528\uff0c\u5982<\/span><span>&nbsp;[Shao \u7b49\uff0c2024\uff1bYang \u7b49\uff0c2024a\uff1bWang \u7b49\uff0c2024e\uff1bDeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.5 \u4ecePPO\u5230DPO<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>RLHF \u9700\u663e\u5f0f\u5efa\u6a21\u5e76\u9884\u8bad\u7ec3\u5956\u52b1\u6a21\u578b\uff0c\u589e\u52a0\u4e86\u8ba1\u7b97\u8d44\u6e90\u6d88\u8017\u4e0e\u8bad\u7ec3\u590d\u6742\u5ea6\u3002\u4e3a\u6b64\uff0cDPO \u9996\u5148\u6307\u51fa\u4e86\u7b49\u5f0f\uff083\uff09\u7684\u95ed\u5f0f\u89e3\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.12222222222222222\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 278px;height: auto !important\" data-imgfileid=\"100227642\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-67aef1b9b61ef4c113e295eac9975e66.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8be5\u7ed3\u8bba\u8bf4\u660e\uff0c\u6700\u4f18\u7b56\u7565\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;1386d6b3-8d66-46e0-bb97-13034a1c9bbb&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi^*(y|x)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99l7o131lcl\" data-pm-slice=\"0 0 []\"><span><span>\u03c0<\/span><\/span><sup><span>*<\/span><\/sup><span><span>(y|x)&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u4e0e\u5956\u52b1\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;ce908464-a1b8-4a55-8b9e-acd210a65610&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r( x, y) n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>r( x, y)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u7d27\u5bc6\u8026\u5408\u3002\u5373\u8bbe\u5b9a\u5956\u52b1\u6a21\u578b\u5373\u53ef\u5bf9\u5e94\u4e00\u4e2a\u6700\u4f18\u7b56\u7565\uff0c\u6700\u5927\u5316\u5176\u9690\u542b\u7684\u6700\u4f18\u8f68\u8ff9\u6982\u7387\u3002DPO \u5c06\u7b49\u5f0f\uff0814\uff09\u53d8\u6362\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.13796296296296295\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 298px;height: auto !important\" data-imgfileid=\"100227643\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-1692a036ea358508105ce25b608670a2.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;367f1f6b-bb45-492d-a30c-d1807f24bd2b&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r(x,y)=\\beta\\log\\frac{\\pi^*(y|x)}{\\pi_{ref}(y|x)}+\\beta\\log Z(x).\\ \\ \\ \\ (15)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd9\u610f\u5473\u7740\uff0c\u5956\u52b1\u51fd\u6570&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;4809e10c-5d56-4500-86ce-10adf0d0a2d2&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r( x, y) n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>r( x, y)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u53ef\u7531\u7b56\u7565\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;8cd5c261-dbc8-4e07-81af-a3568f7ba437&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi(y|x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>\u03c0(y|x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u8868\u8fbe\u3002\u56e0\u6b64\uff0cRafailov \u7b49&nbsp;<\/span><span>[2023]&nbsp;<\/span><span>\u63d0\u51fa\uff1a\u4e0e\u5176\u5148\u8bad\u7ec3\u5956\u52b1\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;3763ac66-6578-4b47-8e0a-5f65db2db5ea&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r( x, y) n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>r( x, y)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u518d\u4f18\u5316\u7b56\u7565&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;8cd5c261-dbc8-4e07-81af-a3568f7ba437&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi(y|x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76ug30&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;8cd5c261-dbc8-4e07-81af-a3568f7ba437&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\\\pi(y|x)\\n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span>\u03c0(y|x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u4e0d\u5982\u76f4\u63a5\u4f18\u5316\u7b56\u7565\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;8cd5c261-dbc8-4e07-81af-a3568f7ba437&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi(y|x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-pm-slice=\"1 1 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76ug30&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;8cd5c261-dbc8-4e07-81af-a3568f7ba437&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\\\pi(y|x)\\n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span>\u03c0(y|x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6765\u7684\u9ad8\u6548\u3002\u5728 RLHF \u4e2d\uff0c\u5956\u52b1\u6a21\u578b\u8bad\u7ec3\u901a\u5e38\u57fa\u4e8e Bradley-Terry \u504f\u597d\u5efa\u6a21\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.06666666666666667\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 326px;height: auto !important\" data-imgfileid=\"100227644\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-d341fb1f36cd7f8d44da4c632b5b1c21.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;648cd180-8caa-4b2a-85dd-21232839f408&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\mathbb{E}_{(x,y_w,y_l)\\in D,y_w\\succ y_l}[\\sigma(r_\\theta(x,y_w)-r_\\theta(x,y_l))]. \\ \\ \\ \\ (16)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c06\u7b49\u5f0f\uff0815\uff09\u4ee3\u5165\u7b49\u5f0f\uff0816\uff09\u540e\uff0cDPO \u53ef\u76f4\u63a5\u5c06\u8be5\u76ee\u6807\u8f6c\u5316\u4e3a\u7b56\u7565\u5b66\u4e60\u76ee\u6807\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.10462962962962963\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 395px;height: auto !important\" data-imgfileid=\"100227645\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-0812f2449a9ac66a0829f355b17bedc5.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;944170b3-f06d-4ad8-bd81-6c20bc4fc081&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\mathbb{E}_{(x,y_w,y_l)\\in D}[\\log\\sigma(\\beta\\log\\frac{\\pi_r(y_w|x)}{\\pi_{ref}(y_w|x)}-\\beta\\log\\frac{\\pi_r(y_l|x)}{\\pi_{ref}(y_l|x)})].\\ \\ \\ \\ (17)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u867d\u7136 DPO \u7701\u53bb\u4e86\u5956\u52b1\u6a21\u578b\u5efa\u6a21\uff0c\u7b80\u5316 RLHF\uff0c\u964d\u4f4e\u4e86 LLM \u4f18\u5316\u95e8\u69db\u3002\u4f46\u540e\u7eed\u7814\u7a76\u4e5f\u63ed\u793a\u4e86\u82e5\u5e72\u95ee\u9898\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u4f18\u5316\u7c92\u5ea6\u7c97\u7cd9<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u539f\u59cb DPO \u53ea\u5728\u54cd\u5e94\u7ea7\u522b\u4f18\u5316\u504f\u597d\uff0c\u96be\u4ee5\u7cbe\u7ec6\u533a\u5206\u590d\u6742\u63a8\u7406\u4e2d\u7684\u6b63\u786e\u4e0e\u9519\u8bef\u6b65\u9aa4\uff0c\u5bb9\u6613\u5c06\u90e8\u5206\u6b63\u786e\u7684\u54cd\u5e94\u4e5f\u6807\u8bb0\u4e3a\u8d1f\u4f8b\u3002\u4e3a\u6b64\uff0c\u540e\u7eed\u63d0\u51fa\u4e86 step-DPO\u3001token-DPO \u7b49\u7ec6\u7c92\u5ea6\u65b9\u6cd5<\/span><span>\uff08\u8be6\u89c1 \u00a74.2.2\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6570\u636e\u5206\u5e03\u504f\u79fb<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1aDPO \u901a\u5e38\u5728\u79bb\u7ebf\u573a\u666f\u8bad\u7ec3\uff0c\u5148\u4f7f\u7528\u53c2\u8003\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;d15368fc-161e-470c-940c-4b5be7060509&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_{ref}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99lhicghg8\" data-pm-slice=\"0 0 []\"><span><span>\u03c0<\/span><\/span><sub><span>ref<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6536\u96c6\u4e00\u6279\u56fa\u5b9a\u7684\u504f\u597d\u6570\u636e\u96c6\uff0c\u518d\u7528 DPO \u8bad\u7ec3\u7b56\u7565\u6a21\u578b \u03c0<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;c474263a-7d9f-484a-b699-0c7a88cf8da6&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_\\phinn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\" data-mpa-action-id=\"m99lh1dp17pl\" data-pm-slice=\"0 0 []\"><sub><span><span>\u03a6<\/span><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u3002\u8be5\u65b9\u6cd5\u867d\u5177\u6709\u8f83\u9ad8\u7684\u8bad\u7ec3\u6548\u7387\uff0c\u4f46\u5b8c\u5168\u4f9d\u8d56\u9759\u6001\u79bb\u7ebf\u6570\u636e\u53ef\u80fd\u9650\u5236\u6a21\u578b\u7684\u6301\u7eed\u5b66\u4e60\u80fd\u529b&nbsp;<\/span><span>[Chen \u7b49\uff0c2024a]<\/span><span>\u3002\u4e3a\u7f13\u89e3\u6b64\u95ee\u9898\uff0c\u6709\u7814\u7a76\u5c06 DPO \u62d3\u5c55\u5230\u5728\u7ebf\u5b66\u4e60\u6846\u67b6\u3002\u5177\u4f53\u505a\u6cd5\u662f\uff1a\u6bcf\u8f6e\u5148\u6536\u96c6\u4e00\u6279\u504f\u597d\u6570\u636e\uff0c\u4f7f\u7528 DPO \u8bad\u7ec3\u7b56\u7565\u6a21\u578b\uff0c\u7136\u540e\u5c06\u65b0\u8bad\u7ec3\u5f97\u5230\u7684\u6a21\u578b \u03c0<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;011e9a52-ac5e-4d15-b0a5-d3340caf0394&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_\\phin&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\" data-mpa-action-id=\"m99lixobrzl\" data-pm-slice=\"0 0 []\"><sub><span><span>\u03a6<\/span><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u66ff\u6362\u4e3a\u4e0b\u4e00\u8f6e\u6570\u636e\u6536\u96c6\u7684\u53c2\u8003\u6a21\u578b \u03c0<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;e76c2c77-b2b0-4c85-8126-14707fd410ed&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_{ref}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99li6gv23my\" data-pm-slice=\"0 0 []\"><sub><span>ref<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u5b9e\u73b0\u7b56\u7565\u7684\u6301\u7eed\u8fed\u4ee3\u4f18\u5316\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6b63\u6837\u672c\u88ab\u6291\u5236<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;DPO \u5728\u8bad\u7ec3\u4e2d\u4e0d\u4ec5\u4f1a\u964d\u4f4e\u8d1f\u6837\u672c\u6982\u7387\uff0c\u4e5f\u53ef\u80fd\u8bef\u4f24\u6b63\u6837\u672c\uff0c\u5c24\u5176\u5f53\u6b63\u8d1f\u5dee\u5f02\u4e0d\u663e\u8457\u65f6\u3002\u4e3a\u89e3\u51b3\u8be5\u95ee\u9898\uff0c\u7814\u7a76\u8005\u5f15\u5165\u6b63\u5219\u5316\u9879\u4ee5\u5f3a\u5316\u5bf9\u6b63\u8d1f\u6837\u672c\u8d28\u91cf\u5dee\u5f02\u7684\u5efa\u6a21<\/span><span>&nbsp;[Azar \u7b49\uff0c2023\uff1bLe \u7b49\uff0c2024]<\/span><span>\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5956\u52b1\u4fe1\u53f7\u5229\u7528\u4e0d\u8db3&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>DPO \u672a\u5bf9\u504f\u597d\u7a0b\u5ea6\u8fdb\u884c\u663e\u5f0f\u5efa\u6a21\uff0c\u5728\u5956\u52b1\u6570\u503c\u53ef\u7528\u7684\u60c5\u51b5\u4e0b\uff0c\u4ec5\u901a\u8fc7\u6bd4\u8f83\u5956\u52b1\u9ad8\u4f4e\u6784\u9020\u504f\u597d\u5bf9\uff0c\u800c\u672a\u76f4\u63a5\u5229\u7528\u5956\u52b1\u4fe1\u53f7\u672c\u8eab\uff0c\u5bfc\u81f4\u4fe1\u606f\u5229\u7528\u4e0d\u8db3\u3002\u540c\u65f6\uff0c\u5bf9\u504f\u597d\u5bf9\u6570\u636e\u7684\u4f9d\u8d56\u4e5f\u63d0\u9ad8\u4e86\u8bad\u7ec3\u6570\u636e\u7684\u6784\u9020\u6210\u672c\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0cOREO<\/span><span>&nbsp;[Wang \u7b49\uff0c2024b]&nbsp;<\/span><span>\u63d0\u51fa\u4e00\u79cd\u5168\u65b0\u7684\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\uff0c\u4ec5\u4f9d\u8d56\u5956\u52b1\u4fe1\u53f7\u8fdb\u884c\u4f18\u5316\uff0c\u5b8c\u5168\u65e0\u9700\u504f\u597d\u5bf9\u6570\u636e\u3002<\/span><\/span><\/p>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 8px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1 DPO \u7b80\u5316\u4e86\u8bad\u7ec3\u6d41\u7a0b\uff0c\u4f46\u5176\u6cdb\u5316\u80fd\u529b\u4ecd\u4e0d\u5982 PPO<\/span><span>&nbsp;[Li \u7b49\uff0c2023c]<\/span><span>\uff0c\u751a\u81f3\u67d0\u4e9b\u4efb\u52a1\u4e2d\u4e0d\u5982\u76f4\u63a5 SFT<\/span><span>&nbsp;[Yuan \u7b49\uff0c2024b\uff1bChen \u7b49\uff0c2024d]<\/span><span>\u3002\u56e0\u6b64\uff0c\u591a\u9879\u6269\u5c55\u65b9\u6cd5\u88ab\u63d0\u51fa\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 8px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>fDPO<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u5f15\u5165\u6563\u5ea6\u7ea6\u675f\uff0c\u589e\u5f3a\u504f\u597d\u8868\u8fbe\u80fd\u529b\u4e0e\u9c81\u68d2\u6027<\/span><span>&nbsp;[Wang \u7b49\uff0c2023a]<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 8px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>cDPO<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u63d0\u5347\u5728\u566a\u58f0\u53cd\u9988\u73af\u5883\u4e0b\u7684\u7a33\u5b9a\u6027&nbsp;<\/span><span>[Chowdhury \u7b49\uff0c2024]<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 8px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>KTO<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u57fa\u4e8e Kahneman-Tversky \u5fc3\u7406\u6a21\u578b\u7ed3\u5408\u4eba\u7c7b\u51b3\u7b56\u504f\u597d&nbsp;<\/span><span>[Ethayarajh \u7b49\uff0c2024]<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 8px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>GPO<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u7528\u51f8\u51fd\u6570\u65cf\u53c2\u6570\u5316\u635f\u5931\u51fd\u6570\uff0c\u7edf\u4e00\u504f\u597d\u5b66\u4e60\u6846\u67b6<\/span><span>&nbsp;[Tang \u7b49\uff0c2024]<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>ORPO<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff1a\u53bb\u9664\u53c2\u8003\u6a21\u578b\uff0c\u4ec5\u4f7f\u7528\u504f\u597d\u4fe1\u606f\u4f18\u5316\u7b56\u7565\uff0c\u8fdb\u4e00\u6b65\u7b80\u5316\u6d41\u7a0b<\/span><span>&nbsp;[Hong \u7b49\uff0c2024]<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.1.6 \u4ece PPO \u5230 PRIME<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Rafailov \u7b49<\/span><span>[2024]<\/span><span>\u8fdb\u4e00\u6b65\u5206\u6790DPO\uff0c\u5e76\u5f15\u5165<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u9690\u5f0f\u5956\u52b1\uff08Implicit Reward\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u6982\u5ff5\uff0c\u5176\u516c\u5f0f\u5982\u4e0b\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.19074074074074074\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 189px;height: auto !important\" data-imgfileid=\"100227646\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-99cf3315498e3bef4efe15df5e519a1e.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;6590e95a-1661-4934-abc4-bc4a9c81ae0b&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r(x,y) = \\beta \\log \\frac{\\pi_\\theta(x,y)}{\\pi_{ref}(y|x)}. \\ \\ \\ \\ (18)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>Rafailov \u7b49<\/span><span>[2024]<\/span><span>\u8ba4\u4e3a\uff0cDPO \u6240\u8bad\u7ec3\u7684\u7b56\u7565\u6a21\u578b\u5b9e\u8d28\u4e0a\u5145\u5f53\u4e86\u4e00\u4e2a\u8bcd\u5143\u7ea7\u5956\u52b1\u51fd\u6570\uff0c\u5176\u4e2d\u6bcf\u4e2a\u8bcd\u5143\u7684\u5956\u52b1\u6b63\u662f\u7531\u8be5\u516c\u5f0f\u5b9a\u4e49\u7684\u9690\u5f0f\u5956\u52b1\u3002\u8fd9\u4e00\u673a\u5236\u7684\u6709\u6548\u6027\u5df2\u5728\u591a\u9879\u5de5\u4f5c\u4e2d\u5f97\u5230\u9a8c\u8bc1<\/span><span>&nbsp;[Zhong \u7b49\uff0c2024\uff1bChen \u7b49\uff0c2024a]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Yuan \u7b49<\/span><span>&nbsp;[2024c]&nbsp;<\/span><span>\u8bc1\u660e\uff0c\u82e5\u5c06\u7ed3\u679c\u5956\u52b1\u51fd\u6570\u5b9a\u4e49\u4e3a<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;bfa47fca-da95-4eef-a03e-3c37e92cf14a&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\theta(y) = \\beta \\log \\frac{\\pi_\\theta(x,y)}{\\pi_{out}(y|x)}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.2603550295857988\" data-type=\"png\" data-w=\"676\" style=\"vertical-align: baseline;width: 130px;height: auto !important\" width=\"130\" data-width=\"130px\" data-imgfileid=\"100227647\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-45ae6c1471937f61a473066c1c4b2d9b.png\" \/><\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\" data-mpa-action-id=\"m99luulp1nx5\" data-pm-slice=\"0 0 []\"><span><span>\uff0c\u5219\u6240\u5f97\u7684 ORM \u53ef\u76f4\u63a5\u7528\u4e8e\u8ba1\u7b97\u8bcd\u5143\u7ea7\u5956\u52b1\u3002\u6362\u53e5\u8bdd\u8bf4\uff0c\u6309\u6b64\u683c\u5f0f\u8bad\u7ec3\u51fa\u7684 ORM \u672c\u8d28\u4e0a\u4e5f\u53ef\u4f5c\u4e3a\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRM\uff09<\/span><span>\u4f7f\u7528\u3002\u5177\u4f53\u800c\u8a00\uff0cPRIME<\/span><span>&nbsp;[Cui \u7b49\uff0c2025]&nbsp;<\/span><span>\u5305\u542b\u56db\u4e2a\u6838\u5fc3\u7ec4\u4ef6\uff1a\u7b56\u7565\u6a21\u578b \u03c0<\/span><\/span><sub><span>\u03a6<\/span><\/sub><\/span><span style=\"font-size: 15px\"><span>\u3001\u7ed3\u679c\u5956\u52b1\u9a8c\u8bc1\u5668&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;ca8ca7dd-8017-4f23-85f6-90c01eacdea4&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_on&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99ltqxb154c\" data-pm-slice=\"0 0 []\"><span><span>r<\/span><\/span><sub><span>o<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u3001\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;829cce72-3a6b-4662-8115-9d1a0eb9232b&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_\\thetann&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>\u03c0<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;fdf4239a-c91b-46d9-9247-8b78739f50f0&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\theta(y)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99lrr3i1kto\" data-pm-slice=\"0 0 []\"><sub><span>\u03b8<\/span><\/sub><\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u53ca\u5176\u5bf9\u5e94\u7684\u53c2\u8003\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;954c3c06-b694-4070-a047-4ac882d328c5&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\pi_{ref}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99ltzpz1se\" data-pm-slice=\"0 0 []\"><span><span>\u03c0<\/span><\/span><sub><span>ref<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u3002\u5728\u751f\u6210\u54cd\u5e94&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;3c8065c9-ddd2-4e67-a426-7f9bfa14f294&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;yn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>y<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u540e\uff0cPRIME\u9996\u5148\u83b7\u53d6\u7ed3\u679c\u7ea7\u5956\u52b1&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;65d149aa-39e5-4a0d-adf2-4b13a17e769a&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_o(y)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99lv0x71e74\" data-pm-slice=\"0 0 []\"><span><span>r<\/span><\/span><sub><span>o<\/span><\/sub><span><span>(y)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff0c\u5e76\u901a\u8fc7\u4ea4\u53c9\u71b5\u635f\u5931\u8bad\u7ec3&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;fdf4239a-c91b-46d9-9247-8b78739f50f0&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\theta(y)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99lrr3i1kto\" data-pm-slice=\"0 0 []\"><span><span>r<\/span><\/span><sub><span>\u03b8<\/span><\/sub><span><span>(y\uff09<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.053703703703703705\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 429px;height: auto !important\" data-imgfileid=\"100227648\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-f47db6f2b74a1624380e54871a1d8ecc.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;12725b00-3184-453a-bbd3-6f2f453eb0cc&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_o(y) \\cdot \\log \\sigma(r_\\theta(y)) + (1 - r_o(y)) \\cdot \\log(1 - \\sigma(r_\\theta(y))),\\ \\ \\ \\ (19)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\" data-mpa-action-id=\"m99lz3g91tcg\" data-pm-slice=\"0 0 []\"><span style=\"font-size: 15px\"><span>\u5176\u4e2d&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;fdf4239a-c91b-46d9-9247-8b78739f50f0&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\theta(y)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99lrr3i1kto\" data-pm-slice=\"2 3 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;font-size: 15px;&quot;,&quot;mpa-font-style&quot;:&quot;m96iv76uz1r&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;},&quot;node&quot;,{&quot;tagName&quot;:&quot;span&quot;,&quot;attributes&quot;:{&quot;data-meta-block-props&quot;:&quot;{&quot;blockId&quot;:&quot;fdf4239a-c91b-46d9-9247-8b78739f50f0&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\\\theta(y)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span><span>r<\/span><\/span><sub><span>\u03b8<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\" data-mpa-action-id=\"m99m54fyodu\" data-pm-slice=\"0 0 []\"><span><span>(y) \u88ab\u4f18\u5316\u4ee5\u903c\u8fd1\u771f\u5b9e\u7684\u7ed3\u679c\u5956\u52b1\u3002\u8be5\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\uff0c\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b \u03c0<\/span><\/span><sub><span>\u03b8<\/span><\/sub><span><span>&nbsp;\u4e5f\u540c\u6b65\u66f4\u65b0\uff0c\u5e76\u53ef\u7528\u4e8e\u4e3a\u6bcf\u4e2a\u8bcd<\/span><\/span><span data-mpa-action-id=\"m99m69d3krm\" data-pm-slice=\"0 0 []\"><span><span>\u5143 y<\/span><\/span><sub><span>t<\/span><\/sub><span><span>&nbsp;\u63d0\u4f9b\u8bcd\u5143\u7ea7\u5956\u52b1\uff1a<\/span><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.16111111111111112\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 248px;height: auto !important\" data-imgfileid=\"100227649\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-9bb512e32fc9868e5f4660243f46aa17.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;29f45e9d-cfbe-44ae-bd97-bce0675b8750&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r_\\theta(y_t) = \\beta \\log \\frac{\\pi_\\theta(y_t|x, y &lt; t)}{\\pi_\\text{ref}(y_t|x, y &lt; t)},\\ \\ \\ \\ (20)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd9\u6b63\u662f\u9690\u5f0f\u5956\u52b1\u7684\u8ba1\u7b97\u5f62\u5f0f\u3002\u57fa\u4e8e\u8bad\u7ec3\u597d\u7684&nbsp;<\/span><\/span><span style=\"font-size: 15px\" data-mpa-action-id=\"m99luulp1nx5\" data-pm-slice=\"2 3 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span><span>\u03c0<\/span><\/span><sub><span data-pm-slice=\"0 0 []\">\u03b8<\/span><\/sub><\/span><span style=\"font-size: 15px\"><span>\uff0cPRIME \u53ef\u4e3a\u7b56\u7565\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\" data-mpa-action-id=\"m99luulp1nx5\" data-pm-slice=\"2 3 [&quot;para&quot;,{&quot;tagName&quot;:&quot;p&quot;,&quot;attributes&quot;:{&quot;style&quot;:&quot;margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;&quot;},&quot;namespaceURI&quot;:&quot;http:\/\/www.w3.org\/1999\/xhtml&quot;}]\"><span><span>\u03c0<\/span><\/span><sub><span>\u03a6&nbsp;<\/span><\/sub><\/span><span style=\"font-size: 15px\"><span>\u63d0\u4f9b\u7cbe\u7ec6\u7684\u8bcd\u5143\u7ea7\u5956\u52b1\uff0c\u4ece\u800c\u4e0e\u5982 RLOO \u7b49\u591a\u79cd\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u65e0\u7f1d\u5bf9\u63a5<\/span><span>\uff08\u5982\u539f\u8bba\u6587\u6240\u793a\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>PRIME \u7684\u6838\u5fc3\u601d\u60f3\u662f\u5c06\u6574\u4f53\u7ed3\u679c\u5956\u52b1\u5206\u89e3\u5230\u6bcf\u4e2a\u8bcd\u5143\uff0c\u501f\u52a9\u5927\u89c4\u6a21\u91c7\u6837\u8fc7\u7a0b\u5b66\u4e60\u8bcd\u5143\u7ea7\u5956\u52b1\u3002\u5bf9\u6700\u7ec8\u7ed3\u679c\u8d21\u732e\u8f83\u5927\u7684\u8bcd\u5143\u5c06\u83b7\u5f97\u66f4\u9ad8\u5956\u52b1\u3002\u8be5\u65b9\u6cd5\u65e0\u9700\u4eba\u5de5\u6807\u6ce8\uff0c\u53ef\u540c\u65f6\u8bad\u7ec3\u7b56\u7565\u6a21\u578b\u4e0e\u5956\u52b1\u6a21\u578b\uff0c\u65e2\u907f\u514d\u4e86\u5956\u52b1\u6b3a\u9a97\u95ee\u9898\uff0c\u4e5f\u63d0\u5347\u4e86\u5956\u52b1\u6a21\u578b\u7684\u6cdb\u5316\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">4.2 \u63a8\u7406\u5668\u4f18\u5316<\/span><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.2.1 \u884c\u4e3a\u514b\u9686<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6536\u96c6\u63a8\u7406\u8fc7\u7a0b\u6570\u636e\u540e\uff0c\u6700\u76f4\u63a5\u7684\u4f18\u5316\u65b9\u5f0f\u662f\u884c\u4e3a\u514b\u9686<\/span><span>\uff08Behavior Cloning, BC\uff09<\/span><span>\uff0c\u5373\u76d1\u7763\u5fae\u8c03<\/span><span>\uff08Supervised Fine-Tuning, SFT\uff09<\/span><span>\u3002\u7136\u800c\uff0c\u4ece\u7406\u8bba\u4e0a\u8bb2\uff0cBC\u53ea\u80fd\u5728\u6b63\u786e\u7684\u6570\u636e\u4e0a\u8fdb\u884c\u3002Yuan \u7b49<\/span><span>[2023a]<\/span><span>\u548c Tong \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa\u4e86\u62d2\u7edd\u5fae\u8c03<\/span><span>\uff08Rejection Fine-Tuning\uff09<\/span><span>\uff0c\u901a\u8fc7\u7b54\u6848\u6807\u7b7e\u8fc7\u6ee4\u9519\u8bef\u7684\u63a8\u7406\u8f68\u8ff9\uff0c\u4ec5\u5728\u6b63\u786e\u8f68\u8ff9\u4e0a\u8fdb\u884c\u5fae\u8c03\u3002\u5c3d\u7ba1\u8fd9\u79cd\u65b9\u6cd5\u80fd\u4fdd\u8bc1\u8bad\u7ec3\u6570\u636e\u7684\u8d28\u91cf\uff0c\u4f46\u4f1a\u9020\u6210\u5927\u91cf\u6570\u636e\u6d6a\u8d39\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u63d0\u5347\u6b63\u786e\u6837\u672c\u7684\u6570\u91cf\uff0cZelikman \u7b49<\/span><span>[2022]<\/span><span>\u63d0\u51fa\u5728\u9519\u8bef\u89e3\u7684\u57fa\u7840\u4e0a\u5f15\u5165\u201c\u5408\u7406\u5316<\/span><span>\uff08Rationalization\uff09<\/span><span>\u201d\u6b65\u9aa4\u6765\u91cd\u751f\u6210\u63a8\u7406\u8fc7\u7a0b\u3002Zhang \u7b49<\/span><span>[2023a]<\/span><span>\u63d0\u51faHIR<\/span><span>\uff08Hindsight Instruction Relabeling\uff09<\/span><span>\uff0c\u901a\u8fc7\u91cd\u6807\u6307\u4ee4\u4f7f\u9519\u8bef\u89e3\u53ef\u7528\uff0c\u4f8b\u5982\u5c06\u201c\u751f\u6210\u6b63\u786e\u7b54\u6848\u201d\u6539\u4e3a\u201c\u751f\u6210\u9519\u8bef\u7b54\u6848\u201d\uff0c\u4ece\u800c\u65e0\u9700\u5f15\u5165\u989d\u5916\u53c2\u6570\u5373\u53ef\u590d\u7528\u5931\u8d25\u6837\u672c\u3002\u540c\u65f6\uff0cZhang \u7b49<\/span><span>[2024c]<\/span><span>\u548c Wang \u7b49<\/span><span>[2024k]<\/span><span>\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u63d0\u9ad8\u53d1\u73b0\u6b63\u786e\u8f68\u8ff9\u7684\u6548\u7387\u3002Chen \u7b49<\/span><span>[2024e]<\/span><span>\u5219\u901a\u8fc7\u6784\u9020\u9006\u5411\u95ee\u9898\u5e76\u5bf9\u5176\u8fdb\u884cSFT\uff0c\u4f7f\u6a21\u578b\u5177\u5907\u9006\u5411\u63a8\u7406\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u4e0a\u8ff0\u65b9\u6cd5\u63d0\u5347\u4e86\u6709\u6548\u6837\u672c\u6bd4\u4f8b\uff0c\u6216\u901a\u8fc7\u4fee\u6539\u6307\u4ee4\u5229\u7528\u9519\u8bef\u6570\u636e\uff0c\u4f46\u5728\u6700\u5927\u5316\u6570\u636e\u5229\u7528\u7387\u4e0e\u6316\u6398\u8d1f\u6837\u672c\u65b9\u9762\u4ecd\u5b58\u5728\u5c40\u9650\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.24591947769314473\" data-type=\"png\" data-w=\"919\" style=\"height: auto !important\" data-width=\"919\" data-height=\"226\" data-imgfileid=\"100227505\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-378bc1852dd8e9dba977ff167e36c0ae.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jil2f195h\" data-pm-slice=\"0 0 []\"><span>\u56fe 6\uff1a\u504f\u597d\u4f18\u5316\u7684\u4e09\u4e2a\u7c92\u5ea6\uff1a\u89e3\u7ea7\u3001\u6b65\u7ea7\u548c\u6807\u8bb0\u7ea7\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.2.2 \u504f\u597d\u4f18\u5316<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u504f\u597d\u4f18\u5316<\/span><span>\uff08Preference Optimization\uff09<\/span><span>\u662f\u63d0\u5347\u5927\u8bed\u8a00\u6a21\u578b\u63a8\u7406\u80fd\u529b\u7684\u4e3b\u6d41\u65b9\u6cd5\u3002\u5176\u6838\u5fc3\u601d\u60f3\u662f\uff1a\u63d0\u5347\u9ad8\u8d28\u91cf\u601d\u7ef4\u94fe<\/span><span>\uff08CoT\uff09<\/span><span>\u7684\u6982\u7387\uff0c\u540c\u65f6\u538b\u4f4e\u52a3\u8d28\u601d\u7ef4\u94fe\u7684\u6982\u7387\uff0c\u4ece\u800c\u589e\u5f3a\u76ee\u6807\u6a21\u578b\u7684\u63a8\u7406\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u65e9\u671f\u65b9\u6cd5\u5982 RRHF<\/span><span>&nbsp;[Yuan \u7b49\uff0c2023b]&nbsp;<\/span><span>\u4ece\u6392\u5e8f\u89d2\u5ea6\u8fdb\u884c\u504f\u597d\u5b66\u4e60\u3002\u8be5\u65b9\u6cd5\u4f7f\u7528\u5956\u52b1\u6a21\u578b\u5bf9\u4e0d\u540c\u6765\u6e90\u91c7\u6837\u7684\u54cd\u5e94&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;4832d7d3-6df8-4abb-98cb-04a2811ed4f9&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y_{i_k}nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span style=\"font-size:12px\" data-pm-slice=\"0 0 []\"><sub><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.7619047619047619\" data-type=\"png\" data-w=\"126\" style=\"vertical-align: baseline;width: 23px;height: auto !important\" width=\"28\" data-width=\"28px\" data-imgfileid=\"100227651\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-39a28ddfadcc9f00bf4045473e45f08e.png\" \/><\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u8fdb\u8fdb\u884c\u8bc4\u5206\u6392\u5e8f\uff0c\u6784\u5efa\u504f\u597d\u5bf9\uff0c\u5e76\u901a\u8fc7\u6392\u5e8f\u635f\u5931\u4f18\u5316\u6a21\u578b\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-bottom: 0px;margin-left: 8px;margin-right: 8px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.35\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 237px;height: auto !important\" data-imgfileid=\"100227650\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-14d908e8f415110e99c515016649e8a3.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;2cd8f90c-735e-49d5-9838-550293ad0bd6&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\begin{gathered}np_i=\\frac{\\sum_t\\log\\pi_\\phi(y_{i,t}|x,y_{i,&lt;t})}{||y_i||}, \\\\nL_{rank}=\\sum_{r_{i}&lt;r_{j}}max(0,p_{i}-p_{j}).n\\end{gathered}\\ \\ \\ \\ (21)nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5176\u4ed6\u504f\u597d\u4f18\u5316\u7b97\u6cd5\uff0c\u4ee5 DPO<\/span><span>&nbsp;[Rafailov \u7b49\uff0c2023]&nbsp;<\/span><span>\u4e3a\u4ee3\u8868\u7684\u504f\u597d\u4f18\u5316\u65b9\u6cd5\u8fdb\u4e00\u6b65\u7b80\u5316\u4e86 RLHF \u6d41\u7a0b\uff0c\u7a81\u7834\u4e86 SFT \u7684\u9650\u5236\uff0c\u5e76\u56e0\u5176\u5b9e\u73b0\u7b80\u4fbf\u800c\u5e7f\u6cdb\u5e94\u7528\u4e8e\u5404\u7c7b\u4efb\u52a1\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e0b\u6587\u5c06\u6309\u504f\u597d\u6570\u636e\u7c92\u5ea6\uff0c\u5c06\u73b0\u6709\u7814\u7a76\u5212\u5206\u4e3a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u89e3\u51b3\u65b9\u6848\u7ea7\uff08solution-level\uff09\u3001\u6b65\u9aa4\u7ea7\uff08step-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\uff08token-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4f18\u5316\u4e09\u7c7b\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u89e3\u51b3\u65b9\u6848\u7ea7\u504f\u597d\u4f18\u5316<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8be5\u5c42\u7ea7\u504f\u597d\u6570\u636e\u6700\u6613\u83b7\u53d6\uff0c\u56e0\u800c\u65e9\u671f\u7814\u7a76\u591a\u96c6\u4e2d\u4e8e\u6b64\u3002Pang \u7b49<\/span><span>[2024]<\/span><span>\u4e0e Jiang \u7b49<\/span><span>[2024a]<\/span><span>\u57fa\u4e8e\u7b54\u6848\u6807\u7b7e\u5c06\u89e3\u5212\u5206\u4e3a\u201c\u6b63\u786e\u201d\u4e0e\u201c\u9519\u8bef\u201d\u7ec4\uff0c\u6784\u9020\u504f\u597d\u5bf9\u8fdb\u884c\u4f18\u5316\u3002\u5728\u7b54\u6848\u6807\u7b7e\u7f3a\u5931\u7684\u81ea\u6211\u8fdb\u5316\u573a\u666f\u4e2d\uff0c\u53ef\u501f\u52a9 LLM-as-a-Judge<\/span><span>&nbsp;[Gu \u7b49\uff0c2024]<\/span><span>&nbsp;\u6216\u9884\u8bad\u7ec3\u5956\u52b1\u6a21\u578b<\/span><span>&nbsp;[Yu \u7b49\uff0c2024a; Ouyang \u7b49\uff0c2022]<\/span><span>\u751f\u6210\u504f\u597d\u6570\u636e\u3002\u4f8b\u5982\uff0cYuan \u7b49<\/span><span>[2024d]<\/span><span>\u5229\u7528\u6a21\u578b\u81ea\u8bc4\u80fd\u529b\u5bf9\u5176\u751f\u6210\u7684\u89e3\u6253\u5206\u3002\u4f46\u81ea\u8bc4\u80fd\u529b\u6709\u9650\uff0c\u5956\u52b1\u51fd\u6570\u6cdb\u5316\u6027\u5f31\uff0c\u5bfc\u81f4\u8bc4\u4f30\u6613\u53d7\u566a\u58f0\u5e72\u6270\u3002Wang \u7b49<\/span><span>[2024c]<\/span><span>\u63d0\u51fa\u201c\u57fa\u4e8e\u4e0d\u786e\u5b9a\u6027\u7684\u504f\u597d\u4f18\u5316\u201d\u6846\u67b6\uff0c\u4f7f\u7528\u8d1d\u53f6\u65af\u795e\u7ecf\u7f51\u7edc\u91cf\u5316\u6bcf\u4e2a\u504f\u597d\u5bf9\u7684\u4e0d\u786e\u5b9a\u6027\uff0c\u5e76\u5c06\u5176\u878d\u5165 DPO \u8bad\u7ec3\uff0c\u63d0\u5347\u9c81\u68d2\u6027\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6b65\u9aa4\u7ea7\u504f\u597d\u4f18\u5316<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u76f8\u8f83\u4e8e\u7c97\u7c92\u5ea6\u7684\u89e3\u7ea7\u4f18\u5316\uff0c\u6b65\u9aa4\u7ea7\u504f\u597d\u4f18\u5316\u53ef\u66f4\u7cbe\u51c6\u5730\u5f15\u5bfc\u6a21\u578b\u5b66\u4e60\u3002\u4f8b\u5982\uff0c\u5728\u4e00\u6761\u9519\u8bef\u89e3\u4e2d\uff0c\u524d\u534a\u90e8\u5206\u63a8\u7406\u6b63\u786e\uff0c\u4ec5\u540e\u7eed\u51fa\u9519\u3002\u5982\u679c\u76f4\u63a5\u6309\u89e3\u7ea7\u4f18\u5316\uff0c\u53ef\u80fd\u4f1a\u8bef\u4f24\u524d\u9762\u6b63\u786e\u7684\u90e8\u5206\u3002\u4e3a\u89e3\u51b3\u8fd9\u4e00\u95ee\u9898\uff0c\u7814\u7a76\u4eba\u5458\u63a2\u7d22\u4e86\u6b65\u9aa4\u7ea7\u504f\u597d\u4f18\u5316\u3002\u76f8\u5173\u5de5\u4f5c\u53ef\u5206\u4e3a\u4e24\u7c7b\uff1a\u4e3b\u52a8\u6784\u5efa\u548c\u6811\u641c\u7d22\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3b\u52a8\u6784\u5efa\u65b9\u6cd5<\/span><span>\uff08active construction approach\uff09<\/span><span>\u7684\u6838\u5fc3\u601d\u60f3\u662f\uff1a\u5728\u5177\u6709\u76f8\u540c\u524d\u7f00\u7684\u63a8\u7406\u8f68\u8ff9\u4e2d\uff0c\u6709\u9488\u5bf9\u6027\u5730\u91c7\u6837\u6b63\u786e\u6216\u9519\u8bef\u7684\u5b50\u8f68\u8ff9\u3002Hwang \u7b49\u4eba<\/span><span>&nbsp;[2024]<\/span><span>&nbsp;\u9996\u5148\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u91c7\u6837\u5b9a\u4f4d\u7f3a\u9677\u8f68\u8ff9&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;26435d92-9814-461f-a2be-4e479c60c2b9&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y^-&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99mhqzriki\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sup><span>&#8211;<\/span><\/sup><span><span>&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u4e2d\u7684\u9996\u4e2a\u9519\u8bef\u6b65\u9aa4\u3002\u4ece\u8be5\u6b65\u9aa4\u8d77\uff0c\u5c06\u6bcf\u4e2a\u6b65\u9aa4\u4e0e\u5176\u524d\u7f6e\u4e0a\u4e0b\u6587\u62fc\u63a5\uff0c\u5e76\u5bf9\u5176\u8fdb\u884c\u591a\u8f6e\u91c7\u6837\u3002\u82e5\u67d0\u4e00\u6b65\u9aa4\u751f\u6210\u7684\u6240\u6709\u63a8\u7406\u8fc7\u7a0b\u5747\u5931\u8d25\uff0c\u5219\u5c06\u8be5\u6b65\u9aa4\u5224\u5b9a\u4e3a\u9519\u8bef\u3002\u63a5\u7740\uff0c\u4ee5\u6b64\u524d\u7684\u6b65\u9aa4\u4e3a\u4e0a\u4e0b\u6587\uff0c\u6784\u9020\u51fa\u4e00\u6761\u65b0\u7684\u6b63\u786e\u8f68\u8ff9&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;0ac8eae0-f850-462e-a913-74df0f4af622&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y^+nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99mi1oj183\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sup><span>+<\/span><\/sup><span><span>&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u3002\u7531&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;ca31785a-8448-4fe8-977b-3d79903e9819&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y^+nn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99mi9w8kfi\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sup><span>+<\/span><\/sup><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u548c&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;0064e3f9-ba3a-4664-b3c7-f64bc9d407ad&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y^-&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99miijp1ukn\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sup><span>&#8211;<\/span><\/sup><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u6784\u6210\u7684\u504f\u597d\u5bf9\u5177\u6709\u76f8\u540c\u524d\u7f00\uff0c\u53ef\u7528\u4e8e\u8bad\u7ec3\u3002\u968f\u540e\uff0c\u91c7\u7528\u5982 DPO \u7b49\u504f\u597d\u5bf9\u9f50\u7b97\u6cd5\u5bf9\u6a21\u578b\u8fdb\u884c\u4f18\u5316\uff0c\u4f7f\u5176\u805a\u7126\u4e8e\u6539\u8fdb\u51b3\u5b9a\u6b63\u8bef\u7684\u8f68\u8ff9\u540e\u7f00\u3002Lai \u7b49\u4eba<\/span><span>&nbsp;[2024]<\/span><span>&nbsp;\u63d0\u51fa\u7c7b\u4f3c\u7b56\u7565\uff0c\u4f7f\u7528 GPT-4 \u68c0\u6d4b\u9519\u8bef\u6b65\u9aa4\uff0c\u5e76\u7ed3\u5408 DPO \u5b9e\u73b0\u6b65\u9aa4\u7ea7\u504f\u597d\u4f18\u5316<\/span><span>\uff08Step-DPO\uff09<\/span><span>\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0cLu \u7b49\u4eba<\/span><span>&nbsp;[2024c]&nbsp;<\/span><span>\u4ece\u4e00\u6761\u6b63\u786e\u8f68\u8ff9\u51fa\u53d1\uff0c\u901a\u8fc7\u8c03\u9ad8\u91c7\u6837\u6e29\u5ea6\u8bf1\u5bfc\u751f\u6210\u9519\u8bef\u7684\u540e\u7eed\u6b65\u9aa4\uff0c\u4ece\u800c\u6784\u5efa\u5931\u8d25\u8f68\u8ff9\u3002\u7c7b\u4f3c\u65b9\u6cd5\uff0c\u5229\u7528GPT-4\u4f5c\u4e3a\u76d1\u89c6\u5668\u68c0\u6d4b\u4e0d\u6b63\u786e\u6b65\u9aa4\uff0c\u968f\u540e\u5e94\u7528DPO\u8fdb\u884c\u6b65\u9aa4\u7ea7\u504f\u597d\u4f18\u5316\uff0c\u79f0\u4e3aStep-DPO\u3002\u76f8\u6bd4\u4e4b\u4e0b\uff0cLu \u7b49<\/span><span>[2024c]<\/span><span>\u91c7\u7528\u4e0d\u540c\u7b56\u7565\uff1a\u4ece\u6b63\u786e\u8f68\u8ff9\u5f00\u59cb\uff0c\u901a\u8fc7\u8bbe\u7f6e\u9ad8\u6e29\u5ea6\u751f\u6210\u4e0d\u6b63\u786e\u7684\u540e\u7eed\u6b65\u9aa4\uff0c\u8bf1\u5bfc\u5931\u8d25\u540e\u7f00\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6811\u641c\u7d22\u65b9\u6cd5<\/span><span>\uff08Tree search-based methods\uff09<\/span><span>\u5219\u76f4\u63a5\u4ece\u641c\u7d22\u6811\u4e2d\u63d0\u53d6\u504f\u597d\u5bf9\u3002Zhang \u7b49\u4eba<\/span><span>&nbsp;[2024h]&nbsp;<\/span><span>\u4f7f\u7528\u601d\u7ef4\u6811<\/span><span>\uff08Tree-of-Thought, ToT\uff09[Yao \u7b49\uff0c2023]<\/span><span>\u8fdb\u884c\u641c\u7d22\uff0c\u5e76\u5728\u8fc7\u7a0b\u4e2d\u901a\u8fc7\u81ea\u6211\u8bc4\u4f30\u5bf9\u8282\u70b9\u8fdb\u884c\u6253\u5206\u3002\u4e00\u65e6\u627e\u5230\u6b63\u786e\u7684\u63a8\u7406\u8def\u5f84\uff0c\u5373\u53ef\u57fa\u4e8e\u8be5\u8def\u5f84\u4e0a\u7684\u8282\u70b9\u6784\u9020\u504f\u597d\u5bf9\u3002\u5728\u5e7f\u5ea6\u4f18\u5148\u641c\u7d22\u4e2d\uff0c\u82e5\u67d0\u8282\u70b9\u5728\u4ece\u6b63\u786e\u8def\u5f84\u5ef6\u5c55\u65f6\u88ab\u526a\u679d\uff0c\u5219\u5176\u5bf9\u5e94\u6b65\u9aa4\u53ef\u89c6\u4e3a\u8d1f\u4f8b\uff0c\u4e0e\u6b63\u786e\u8def\u5f84\u8282\u70b9\u6784\u6210\u504f\u597d\u5bf9\u3002\u76f8\u8f83 ToT\uff0c\u8bb8\u591a\u5de5\u4f5c\u66f4\u504f\u597d\u4f7f\u7528 MCTS \u8fdb\u884c\u641c\u7d22\uff0c\u56e0\u5176\u80fd\u66f4\u597d\u5730\u5e73\u8861\u63a2\u7d22\u4e0e\u5229\u7528\u3002Xie \u7b49\u4eba<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u548c Chen \u7b49\u4eba<\/span><span>&nbsp;[2024c]&nbsp;<\/span><span>\u4fbf\u5728 MCTS \u57fa\u7840\u4e0a\u6784\u9020\u504f\u597d\u5bf9\u3002\u524d\u8005\u9009\u62e9\u540c\u4e00\u5c42\u7ea7\u4e2d Q \u503c\u6700\u9ad8\u4e0e\u6700\u4f4e\u7684\u8282\u70b9\u7ec4\u6210\u504f\u597d\u5bf9\uff1b\u540e\u8005\u5219\u4ece\u540c\u4e00\u7236\u8282\u70b9\u7684\u5b50\u8282\u70b9\u4e2d\u9009\u53d6 Q \u503c\u5dee\u5f02\u8f83\u5927\u7684\u8282\u70b9\u5bf9\uff0c\u4f5c\u4e3a\u4f18\u5316\u76ee\u6807\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\u504f\u597d\u4f18\u5316<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd1\u671f\u7814\u7a76\u5f00\u59cb\u63a2\u7d22\u8bcd\u5143\u7ea7\u504f\u597d\u4f18\u5316\uff0c\u4ee5\u652f\u6301\u66f4\u7ec6\u7c92\u5ea6\u7684\u63a8\u7406\u80fd\u529b\u63d0\u5347\u3002\u8be5\u65b9\u6cd5\u7684\u6838\u5fc3\u6311\u6218\u5728\u4e8e\u83b7\u53d6\u8bcd\u5143\u7ea7\u504f\u597d\u5bf9\u3002Rafailov \u7b49\u4eba<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u4e0e Zhong \u7b49\u4eba&nbsp;<\/span><span>[2024]&nbsp;<\/span><span>\u6307\u51fa\uff0c\u901a\u8fc7\u76f4\u63a5\u504f\u597d\u4f18\u5316<\/span><span>\uff08DPO\uff09<\/span><span>\u8bad\u7ec3\u7684\u7b56\u7565\u6a21\u578b\u53ef\u4ee5\u9690\u5f0f\u5b66\u4e60\u8bcd\u5143\u7ea7\u5956\u52b1\u4fe1\u53f7\uff0c\u5f62\u5f0f\u4e3a\u201c\u9690\u5f0f\u5956\u52b1\u201d\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.30043859649122806\" data-s=\"300,640\" data-type=\"png\" data-w=\"912\" style=\"width: 159px;height: auto !important\" data-imgfileid=\"100227652\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-0de525044b73725fd3bbae5d1ee6263f.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;4b24eece-843d-42d3-9fb0-46ed6d08d79a&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;n\\beta \\log \\frac {\\pi _{dpo}( y_t| x, y_{&lt; t}) }{\\pi _{ref}( y_t| x, y_{&lt; t}) }n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd9\u4e00\u53d1\u73b0\u4e3a\u6784\u5efa\u8bcd\u5143\u7ea7 DPO \u7b97\u6cd5\u63d0\u4f9b\u4e86\u7406\u8bba\u57fa\u7840\u3002Yang \u7b49\u4eba<\/span><span>&nbsp;[2024b]&nbsp;<\/span><span>\u5728\u6b64\u57fa\u7840\u4e0a\u8fdb\u4e00\u6b65\u5bf9\u9690\u5f0f\u5956\u52b1\u8fdb\u884c\u4e86\u6539\u8fdb\uff0c\u4ee5\u63d0\u5347\u4f18\u5316\u6548\u679c\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\" data-mpa-action-id=\"m99mlm5ech9\" data-pm-slice=\"0 0 []\"><span><span>\u4f5c\u4e3a\u8865\u5145\u65b9\u6cd5\uff0cLin \u7b49\u4eba<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u63d0\u51fa cDPO \u7b97\u6cd5\uff0c\u4ece\u53e6\u4e00\u4e2a\u89c6\u89d2\u6807\u6ce8\u8bcd\u5143\u7ea7\u91cd\u8981\u6027\u3002\u5176\u65b9\u6cd5\u662f\u5728\u6b63\u786e\u4e0e\u9519\u8bef\u89e3\u4e0a\u5206\u522b\u5fae\u8c03\u4e24\u4e2a\u8bed\u8a00\u6a21\u578b\uff0c\u5e76\u8ba1\u7b97\u4e24\u8005\u5728\u6bcf\u4e2a\u8bcd\u5143\u4e0a\u7684\u6982\u7387\u5dee\u5f02\uff0c\u4ece\u800c\u4f30\u8ba1\u8be5\u8bcd\u5143\u5728\u9519\u8bef\u63a8\u7406\u4e2d\u6240\u627f\u62c5\u7684\u8d23\u4efb\u3002\u5bf9\u4e8e\u5dee\u5f02\u5206\u6570\u8f83\u4f4e\u7684\u8bcd\u5143 s<\/span><\/span><sub><span>t<\/span><\/sub><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff0c\u610f\u5473\u7740\u5176\u5bf9\u63a8\u7406\u5931\u8d25\u5f71\u54cd\u66f4\u5927\uff0c\u53ef\u636e\u6b64\u5bf9\u5173\u952e\u8bcd\u5143\u8fdb\u884c\u52a0\u6743\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u57fa\u4e8e DPO \u7684\u65b9\u6cd5\u56e0\u5b9e\u73b0\u7b80\u5355\u800c\u5f97\u5230\u5e7f\u6cdb\u5e94\u7528\uff0c\u4f46\u5176\u5728\u63a8\u7406\u589e\u5f3a\u65b9\u9762\u4ecd\u5b58\u5728\u5c40<\/span><span>\u9650\uff0c\u5982 4.1.5 \u8282\u6240\u8ff0\u3002\u503c\u5f97\u5173\u6ce8\u7684\u662f\uff0cO1 \u535a\u5ba2 [OpenAI, 2024b] \u4e0e R1 \u62a5\u544a [DeepSeek-AI \u7b49\uff0c2025]&nbsp;<\/span><span>\u5747\u5f3a\u8c03\uff1a\u82e5\u8981\u5b9e\u73b0\u590d\u6742\u63a8\u7406\u80fd\u529b\u7684\u663e\u8457\u8dc3\u5347\uff0c\u6700\u7ec8\u4ecd\u53ef\u80fd\u9700\u8981\u5f15\u5165\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u6280\u672f\uff0c\u51f8\u663e\u66f4\u9ad8\u7ea7\u4f18\u5316\u6846\u67b6\u7684\u91cd\u8981\u6027\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.2.3 \u5f3a\u5316\u5b66\u4e60<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u65e0\u6a21\u578b\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\uff08Model-free Online Reinforcement Learning\uff09<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5bf9\u4e8e\u6570\u5b66\u63a8\u7406\u7b49\u4efb\u52a1\uff0c\u5176\u73af\u5883\u52a8\u6001\u5177\u6709\u786e\u5b9a\u6027\uff0c\u56e0\u4e3a\u63a8\u7406\u8fc7\u7a0b\u4e2d\u5e76\u4e0d\u6d89\u53ca\u5bf9\u5916\u90e8\u73af\u5883\u7684\u611f\u77e5\u6216\u4ea4\u4e92\u3002\u6bcf\u6267\u884c\u4e00\u4e2a\u52a8\u4f5c<\/span><span>\uff08\u5982\u751f\u6210\u4e00\u4e2a\u8bcd\u5143\u6216\u63a8\u7406\u6b65\u9aa4\uff09<\/span><span>\uff0c\u6a21\u578b\u7684\u63a8\u7406\u72b6\u6001\u4fbf\u4f1a\u81ea\u52a8\u66f4\u65b0\uff0c\u4f8b\u5982\u901a\u8fc7\u5c06\u65b0\u751f\u6210\u7684\u8bcd\u5143\u8ffd\u52a0\u81f3\u5df2\u6709\u4e0a\u4e0b\u6587\u4e2d\u5f62\u6210\u65b0\u7684\u63a8\u7406\u72b6\u6001\u3002\u968f\u7740\u5927\u8bed\u8a00\u6a21\u578b\u63a8\u7406\u6548\u7387\u7684\u63d0\u5347\uff0c\u4ece\u6a21\u578b\u4e2d\u91c7\u6837\u751f\u6210\u5b8c\u6574\u63a8\u7406\u8def\u5f84\u5df2\u53d8\u5f97\u9ad8\u6548\u4e14\u4f4e\u6210\u672c\u3002\u56e0\u6b64\uff0c\u5728\u6b64\u7c7b\u4efb\u52a1\u4e2d\uff0c\u901a\u5e38\u65e0\u9700\u6784\u5efa\u73af\u5883\u6a21\u578b\uff0c\u4ec5\u4f9d\u8d56\u65e0\u6a21\u578b<\/span><span>\uff08model-free\uff09<\/span><span>\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u5373\u53ef\u5b9e\u73b0\u6709\u6548\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u201c\u65e0\u6a21\u578b\u201d\u662f\u6307\u4e0d\u5bf9\u73af\u5883\u8fdb\u884c\u663e\u5f0f\u5efa\u6a21\uff0c\u800c\u662f\u901a\u8fc7\u7b56\u7565\u76f4\u63a5\u4e0e\u73af\u5883\u4ea4\u4e92\u6765\u5b66\u4e60\uff1b\u201c\u5728\u7ebf\u201d\u5219\u610f\u5473\u7740\u8bad\u7ec3\u6570\u636e\u6765\u6e90\u4e8e\u5f53\u524d\u7b56\u7565\u4e0e\u73af\u5883\u7684\u5b9e\u65f6\u4ea4\u4e92\uff0c\u800c\u975e\u4f9d\u8d56\u56fa\u5b9a\u7684\u5386\u53f2\u6570\u636e\u96c6<\/span><span>\uff08\u5373\u201c\u79bb\u7ebf\u201d\u5b66\u4e60\uff09<\/span><span>\u3002\u5728\u7ebf\u4e0e\u79bb\u7ebf\u5b66\u4e60\u65b9\u5f0f\u7684\u5dee\u5f02\uff0c\u4f1a\u663e\u8457\u5f71\u54cd\u5f3a\u5316\u5b66\u4e60\u8fc7\u7a0b\u4e2d\u7684\u6570\u636e\u5206\u5e03\u504f\u79fb\u95ee\u9898\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u5927\u8bed\u8a00\u6a21\u578b<\/span><span>\uff08LLM\uff09<\/span><span>\u8bad\u7ec3\u4e2d\uff0c\u5e38\u7528\u7684\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u5305\u62ecREINFORCE<\/span><span>&nbsp;[Sutton \u7b49\uff0c1999]<\/span><span>\u3001PPO<\/span><span>&nbsp;[Schulman \u7b49\uff0c2017]<\/span><span>\u548cGRPO<\/span><span>&nbsp;[Shao \u7b49\uff0c2024]<\/span><span>\u3002Li \u7b49<\/span><span>[2023d]<\/span><span>\u548cAhmadian \u7b49<\/span><span>[2024]<\/span><span>\u53d1\u73b0\uff0c\u5728\u6ca1\u6709\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08ORM\uff09<\/span><span>\u548c\u4ef7\u503c\u6a21\u578b\u7684\u60c5\u51b5\u4e0b\uff0c\u76f4\u63a5\u5e94\u7528REINFORCE\u53d6\u5f97\u4e86\u826f\u597d\u6548\u679c\u3002Ylfeng \u7b49<\/span><span>[2024]<\/span><span>\u53d7\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08RLHF\uff09<\/span><span>\u7684\u542f\u53d1\uff0c\u4f7f\u7528PPO\u5728\u89e3\u51b3\u65b9\u6848\u5c42\u9762\u63d0\u5347\u4e86LLM\u7684\u63a8\u7406\u80fd\u529b\u3002Zhang \u7b49<\/span><span>[2024j]<\/span><span>\u5b66\u4e60\u4e86\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRM\uff09<\/span><span>\uff0c\u5e76\u5728\u6b65\u9aa4\u5c42\u9762\u5229\u7528\u5176\u6307\u5bfcPPO\u8bad\u7ec3\u3002Zhong \u7b49<\/span><span>[2024]<\/span><span>\u5229\u7528\u76f4\u63a5\u504f\u597d\u4f18\u5316<\/span><span>\uff08DPO\uff09<\/span><span>\u7684\u9690\u5f0f\u5956\u52b1\uff0c\u5728\u8bcd\u5143\u5c42\u9762\u8fdb\u4e00\u6b65\u6307\u5bfcPPO\u8bad\u7ec3\u3002\u8bf8\u5982deepseek-math<\/span><span>&nbsp;[Shao \u7b49\uff0c2024]<\/span><span>\u3001qwen-math<\/span><span>&nbsp;[Yang \u7b49\uff0c2024a]<\/span><span>\u548cOpenR&nbsp;<\/span><span>[Wang \u7b49\uff0c2024e]<\/span><span>\u7b49\u9879\u76ee\u91c7\u7528GRPO<\/span><span>&nbsp;[Shao \u7b49\uff0c2024]<\/span><span>\u8fdb\u884c\u8bad\u7ec3\uff0c\u8bad\u7ec3\u8fc7\u7a0b\u7531PRM\u6307\u5bfc\uff0c\u663e\u8457\u589e\u5f3a\u4e86LLM\u7684\u591a\u8df3\u63a8\u7406\u80fd\u529b\u3002\u5c3d\u7ba1\u5f53\u524d\u7684\u65e0\u6a21\u578b\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u53d6\u5f97\u4e86\u6210\u529f\uff0c\u4f46\u968f\u7740\u4efb\u52a1\u590d\u6742\u6027\u7684\u589e\u52a0\u548c\u63a8\u7406\u4efb\u52a1\u6269\u5c55\u5230\u66f4\u591a\u73b0\u5b9e\u573a\u666f\uff0c\u4ec5\u4f9d\u8d56\u65e0\u4ea4\u4e92\u7684\u73af\u5883\u662f\u4e0d\u591f\u7684\u3002\u5728\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u9884\u8ba1\u66f4\u4e3a\u591a\u6837\u5316\u7684\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\u5c06\u5728LLM\u7684\u540e\u7eed\u8bad\u7ec3\u4f18\u5316\u4e2d\u53d1\u6325\u5173\u952e\u4f5c\u7528\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.5592592592592592\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-width=\"1107\" data-height=\"619\" data-imgfileid=\"100227509\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-2487c93bd5d84424bc16247ab956ba3a.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jhpxqxq8\" data-pm-slice=\"0 0 []\"><span>\u56fe7\uff1a\u4e09\u79cd\u5f3a\u5316\u5b66\u4e60\u8303\u5f0f\u7684\u6bd4\u8f83\uff1a\u5728\u7ebf\u57fa\u4e8e\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u3001\u57fa\u4e8e\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u3001\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u548c\u5c42\u6b21\u5f3a\u5316\u5b66\u4e60\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\uff08Offline Reinforcement Learning \uff09<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u4f7f\u7528\u9759\u6001\u6570\u636e\u96c6\u800c\u975e\u901a\u8fc7\u7b56\u7565\u6a21\u578b\u4e0e\u73af\u5883\u4ea4\u4e92\u6536\u96c6\u7684\u8f68\u8ff9\u6570\u636e\u6765\u8bad\u7ec3\u7b56\u7565\u6a21\u578b<\/span><span>[Prudencio \u7b49\uff0c2022]<\/span><span>\u3002\u7531\u4e8e\u8bad\u7ec3\u5927\u8bed\u8a00\u6a21\u578b\u7684\u65f6\u95f4\u548c\u8ba1\u7b97\u6210\u672c\uff0c\u6279\u91cf\u8bad\u7ec3\u5728\u5927\u89c4\u6a21\u5fae\u8c03\u8fc7\u7a0b\u4e2d\u63d0\u4f9b\u4e86\u663e\u8457\u4f18\u52bf\u3002\u56e0\u6b64\uff0c\u8bb8\u591a\u7814\u7a76\uff0c\u7279\u522b\u662f\u5728\u5b66\u672f\u754c\uff0c\u91c7\u7528\u79bb\u7ebf\u8bad\u7ec3\u65b9\u6cd5\u6765\u8bad\u7ec3\u5927\u8bed\u8a00\u6a21\u578b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Snell \u7b49\u4eba<\/span><span>&nbsp;[2022]&nbsp;<\/span><span>\u5bf9\u73b0\u6709\u7684\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5 IQL \u8fdb\u884c\u4e86\u6539\u8fdb\uff0c\u5e76\u5c06\u5176\u5e94\u7528\u4e8e\u81ea\u7136\u8bed\u8a00\u751f\u6210\u4efb\u52a1\uff0c\u63d0\u51fa\u4e86 ILQL \u7b97\u6cd5\u3002\u5f53\u524d\u6700\u5e38\u7528\u7684\u79bb\u7ebf\u8bad\u7ec3\u65b9\u6cd5\u662f DPO&nbsp;<\/span><span>[Rafailov \u7b49\uff0c2023]<\/span><span>\uff0c\u5176\u57fa\u672c\u6d41\u7a0b\u662f\u5148\u6536\u96c6\u5927\u91cf\u504f\u597d\u6570\u636e\uff0c\u518d\u5728\u6b64\u57fa\u7840\u4e0a\u8fdb\u884c\u504f\u597d\u5b66\u4e60\u3002\u8be5\u65b9\u6cd5\u4e0d\u4ec5\u7701\u53bb\u4e86\u5956\u52b1\u5efa\u6a21\u7684\u6b65\u9aa4\uff0c\u4e5f\u6781\u5927\u63a8\u52a8\u4e86 DPO \u7684\u5e7f\u6cdb\u5e94\u7528\u3002\u4e3a\u514b\u670d DPO \u4ec5\u4f9d\u8d56\u504f\u597d\u4fe1\u606f\u3001\u65e0\u6cd5\u5229\u7528\u5b9e\u9645\u5956\u52b1\u503c\u7684\u5c40\u9650\uff0cWang \u7b49\u4eba<\/span><span>&nbsp;[2024b]&nbsp;<\/span><span>\u57fa\u4e8e\u6700\u5927\u71b5\u5f3a\u5316\u5b66\u4e60<\/span><span>&nbsp;[Haarnoja \u7b49\uff0c2017]<\/span><span>&nbsp;\u63a8\u5bfc\u51fa\u4e00\u79cd\u65b0\u7684\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5 OREO\uff0c\u6709\u6548\u5f25\u8865\u4e86\u4e0a\u8ff0\u4e0d\u8db3\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08\u5c24\u5176\u662f DPO\uff09<\/span><span>\u5728\u5f53\u524d\u7814\u7a76\u4e2d\u88ab\u5e7f\u6cdb\u91c7\u7528\uff0c\u4f46\u8be5\u65b9\u6cd5\u4ecd\u5b58\u5728\u4e00\u4e9b\u663e\u8457\u5c40\u9650\u3002\u5176\u6838\u5fc3\u95ee\u9898\u5728\u4e8e\u8bad\u7ec3\u6570\u636e\u5e76\u975e\u6e90\u81ea\u5f53\u524d\u7b56\u7565\u6a21\u578b\uff0c\u800c\u662f\u91c7\u6837\u81ea\u5148\u524d\u7684\u6b21\u4f18\u7b56\u7565\u3002\u968f\u7740\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u7b56\u7565\u6a21\u578b\u7684\u6301\u7eed\u4f18\u5316\uff0c\u884c\u4e3a\u7b56\u7565\u4e0e\u76ee\u6807\u7b56\u7565\u4e4b\u95f4\u7684\u504f\u5dee\u4e0d\u65ad\u6269\u5927\uff0c\u4e25\u91cd\u524a\u5f31\u4e86\u8bad\u7ec3\u6548\u679c\u3002\u5bf9\u6b64\uff0cChen \u7b49\u4eba<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u63d0\u51fa\uff0c\u53ef\u901a\u8fc7\u5c06\u79bb\u7ebf\u8bad\u7ec3\u8fc7\u6e21\u4e3a\u5728\u7ebf\u8bad\u7ec3\u7b56\u7565\u6765\u7f13\u89e3\u8be5\u95ee\u9898\u3002\u53e6\u4e00\u79cd\u5e38\u89c1\u505a\u6cd5\u662f\uff0c\u5148\u5229\u7528\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u5bf9\u5927\u8bed\u8a00\u6a21\u578b\u8fdb\u884c\u521d\u59cb\u5316<\/span><span>&nbsp;[Yang \u7b49\uff0c2024c\uff1bHe \u7b49\uff0c2024c]<\/span><span>\uff0c\u518d\u7ed3\u5408\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u8fdb\u4e00\u6b65\u63d0\u5347\u6027\u80fd\u3002\u76f8\u8f83\u4e8e\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\uff0c\u79bb\u7ebf\u65b9\u6cd5\u5177\u5907\u53ef\u9884\u5148\u6784\u9020\u8bad\u7ec3\u4fe1\u53f7\u7684\u4f18\u52bf\uff0c\u56e0\u800c\u5728\u5b58\u5728\u6807\u51c6\u7b54\u6848\u3001\u4f46\u96be\u4ee5\u901a\u8fc7\u5956\u52b1\u6a21\u578b\u7cbe\u786e\u8bc4\u4f30\u7684\u4efb\u52a1\u4e2d\u5c24\u4e3a\u9002\u7528&nbsp;<\/span><span>[Yang \u7b49\uff0c2024c]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u57fa\u4e8e\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\uff08Model-based Reinforcement Learning\uff09<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5bf9\u4e8e\u6d89\u53ca\u4e0e\u5916\u90e8\u73af\u5883\u4ea4\u4e92\u7684\u4efb\u52a1<\/span><span>\uff08\u5982\u5bf9\u8bdd\u7cfb\u7edf\u548c\u89c6\u89c9\u5bfc\u822a\uff09<\/span><span>\uff0c\u73af\u5883\u5efa\u6a21\u662f\u5f3a\u5316\u5b66\u4e60\u4e2d\u7684\u5173\u952e\u73af\u8282<\/span><span>&nbsp;[Moerland \u7b49\uff0c2020]<\/span><span>\u3002\u901a\u8fc7\u6784\u5efa\u6a21\u62df\u73af\u5883<\/span><span>\uff08\u6216\u79f0\u201c\u4e16\u754c\u6a21\u578b\u201d\uff09[Zhu \u7b49\uff0c2024]<\/span><span>\uff0c\u7cfb\u7edf\u80fd\u591f\u5728\u8bad\u7ec3\u4e0e\u63a8\u7406\u8fc7\u7a0b\u4e2d\u63d0\u4f9b\u53cd\u9988\u4fe1\u53f7\u3001\u72b6\u6001\u8f6c\u79fb\u4ee5\u53ca\u5185\u90e8\u89c4\u5212\u80fd\u529b\uff0c\u4ece\u800c\u663e\u8457\u964d\u4f4e\u4ea4\u4e92\u6210\u672c\u3002\u6709\u6548\u7684\u4e16\u754c\u6a21\u578b\u5e94\u5177\u5907\u5145\u5206\u7684\u4efb\u52a1\u77e5\u8bc6\uff0c\u80fd\u591f\u9488\u5bf9\u7b56\u7565\u6a21\u578b\u7684\u52a8\u4f5c\u51c6\u786e\u9884\u6d4b\u72b6\u6001\u8f6c\u79fb\u548c\u5956\u52b1\u53cd\u9988\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5178\u578b\u5b9e\u4f8b\u5305\u62ec AlphaGo Zero<\/span><span>&nbsp;[Silver \u7b49\uff0c2017]<\/span><span>\uff0c\u5176\u901a\u8fc7\u5efa\u6a21\u5bf9\u624b\u5e76\u7ed3\u5408\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u6a21\u62df\u535a\u5f08\u72b6\u6001\uff0c\u7528\u4e8e\u4f18\u5316\u7b56\u7565\u5b66\u4e60\u3002\u7c7b\u4f3c\u5730\uff0cHao \u7b49&nbsp;<\/span><span>[2023]&nbsp;<\/span><span>\u8bc1\u660e\u4e86\u5927\u8bed\u8a00\u6a21\u578b\u53ef\u4f5c\u4e3a\u89c4\u5212\u4efb\u52a1\u4e2d\u7684\u4e16\u754c\u6a21\u578b\uff0cHe \u7b49<\/span><span>&nbsp;[2024c]&nbsp;<\/span><span>\u5219\u5728\u5bf9\u8bdd\u89c4\u5212\u4e2d\u5f15\u5165\u5927\u8bed\u8a00\u6a21\u578b\uff0c\u901a\u8fc7 MCTS \u6846\u67b6\u6a21\u62df\u7528\u6237\u4ea4\u4e92\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5c3d\u7ba1\u5df2\u6709\u521d\u6b65\u8fdb\u5c55\uff0c\u57fa\u4e8e\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u5728\u5927\u8bed\u8a00\u6a21\u578b\u4e3b\u5bfc\u7684\u590d\u6742\u63a8\u7406\u4efb\u52a1\u4e2d\u4ecd\u663e\u4e0d\u8db3\uff0c\u5c24\u5176\u662f\u5728\u6570\u5b66\u63a8\u7406\u7b49\u4e0d\u6d89\u53ca\u5916\u90e8\u73af\u5883\u52a8\u6001\u7684\u573a\u666f\u4e2d\u5e94\u7528\u53d7\u9650\u3002\u7136\u800c\uff0c\u968f\u7740\u7814\u7a76\u4e0d\u65ad\u5411\u66f4\u9ad8\u590d\u6742\u5ea6\u4efb\u52a1\u8fc8\u8fdb\uff0c\u4e16\u754c\u6a21\u578b\u4e0e\u57fa\u4e8e\u6a21\u578b\u7684\u5f3a\u5316\u5b66\u4e60\u5728\u5927\u8bed\u8a00\u6a21\u578b\u4e2d\u7684\u878d\u5408\u6709\u671b\u6210\u4e3a\u91cd\u8981\u53d1\u5c55\u65b9\u5411\uff0c\u4e3a\u63a8\u7406\u80fd\u529b\u7684\u8fdb\u4e00\u6b65\u63d0\u5347\u5f00\u8f9f\u65b0\u8def\u5f84\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5c42\u6b21\u5f3a\u5316\u5b66\u4e60\uff08Hierarchical Reinforcement Learning\uff09<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8bb8\u591a\u63a8\u7406\u4efb\u52a1\u53ef\u6709\u6548\u5efa\u6a21\u4e3a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u5c42\u6b21\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b\uff08Hierarchical MDPs\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\uff0c\u53cd\u6620\u4eba\u7c7b\u8ba4\u77e5\u7684\u5206\u5c42\u7279\u5f81\u3002\u4f8b\u5982\uff0c\u5728\u6570\u5b66\u63a8\u7406\u4e2d\uff0c\u5b66\u751f\u5f80\u5f80\u4e0d\u4f1a\u9010\u8bcd\u751f\u6210\u89e3\u7b54\uff0c\u800c\u662f\u5148\u6784\u601d\u4e00\u7cfb\u5217\u63a8\u7406\u6b65\u9aa4\uff0c\u7136\u540e\u518d\u57fa\u4e8e\u8fd9\u4e9b\u6b65\u9aa4\u586b\u5145\u5177\u4f53\u5185\u5bb9\u3002\u8fd9\u4e00\u8fc7\u7a0b\u81ea\u7136\u5212\u5206\u4e3a\u4e24\u4e2a\u5c42\u7ea7\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u9ad8\u5c42\u6a21\u578b<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u751f\u6210\u62bd\u8c61\u7684\u63a8\u7406\u601d\u8def\uff0c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u4f4e\u5c42\u6a21\u578b<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5728\u6b64\u57fa\u7840\u4e0a\u751f\u6210\u5bf9\u5e94\u7684\u8bcd\u5143\u5185\u5bb9\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Liu \u7b49<\/span><span>&nbsp;[2024b]&nbsp;<\/span><span>\u5c06\u63a8\u7406\u4efb\u52a1\u5f62\u5f0f\u5316\u4e3a\u5c42\u6b21 MDP\uff1a\u9ad8\u5c42\u6a21\u578b\u9996\u5148\u9009\u62e9\u63a8\u7406\u7b56\u7565<\/span><span>\uff08\u5982\u601d\u7ef4\u94fe CoT [Wei \u7b49\uff0c2022]\u3001\u7531\u6d45\u5165\u6df1\u63d0\u793a L2M [Zhou \u7b49\uff0c2022]\u3001\u7f16\u7a0b\u5f0f\u63a8\u7406 PoT [Chen \u7b49\uff0c2022]\uff09<\/span><span>\uff0c\u518d\u751f\u6210\u5177\u4f53\u63a8\u7406\u8fc7\u7a0b\uff1b\u82e5\u63a8\u7406\u5931\u8d25\uff0c\u5219\u8fed\u4ee3\u9009\u62e9\u65b0\u7684\u7b56\u7565\u3002SMART<\/span><span>&nbsp;[Liu \u7b49\uff0c2024b]&nbsp;<\/span><span>\u91c7\u7528\u7b56\u7565\u68af\u5ea6\u65b9\u6cd5<\/span><span>&nbsp;[Lee \u7b49\uff0c2024c]&nbsp;<\/span><span>\u4f18\u5316\u9ad8\u5c42\u51b3\u7b56\u8fc7\u7a0b\uff0c\u4f46\u672a\u6d89\u53ca\u4f4e\u5c42\u63a8\u7406\u7684\u4f18\u5316\u3002ReasonFlux<\/span><span>&nbsp;[Yang \u7b49\uff0c2025a]&nbsp;<\/span><span>\u5219\u6784\u5efa\u4e86\u4e00\u7cfb\u5217\u201c\u601d\u7ef4\u6a21\u677f\u201d\uff0c\u901a\u8fc7\u9ad8\u5c42\u89c4\u5212\u751f\u6210\u63a8\u7406\u610f\u56fe\u5e8f\u5217\uff0c\u518d\u5728\u5177\u4f53\u4efb\u52a1\u4e0a\u4e0b\u6587\u4e2d\u5bf9\u6bcf\u4e00\u9879\u610f\u56fe\u8fdb\u884c\u5b9e\u4f8b\u5316\uff0c\u6700\u7ec8\u5f62\u6210\u5b8c\u6574\u3001\u8fde\u8d2f\u7684\u63a8\u7406\u8def\u5f84\u3002\u8fd9\u79cd\u7ed3\u6784\u5316\u65b9\u6cd5\u6709\u6548\u5730\u5c06\u590d\u6742\u4efb\u52a1\u89e3\u6784\u4e3a\u62bd\u8c61\u9ad8\u5c42\u8ba1\u5212\u4e0e\u5bf9\u5e94\u7684\u53ef\u6267\u884c\u63a8\u7406\u5b50\u4efb\u52a1\u3002\u7c7b\u4f3c\u5730\uff0cZhou \u7b49&nbsp;<\/span><span>[2024]&nbsp;<\/span><span>\u63d0\u51fa&nbsp;<\/span><\/span><strong style=\"font-size: 15px\"><span><span>ArCHer<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u6846\u67b6\u2014\u2014\u4e00\u4e2a\u7528\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u7684\u5c42\u6b21\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\u3002\u5728\u9ad8\u5c42\uff0cArCHer \u4f7f\u7528\u57fa\u4e8e\u503c\u51fd\u6570\u7684\u79bb\u7ebf\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5 IQL<\/span><span>&nbsp;[Kostrikov \u7b49\uff0c2021]<\/span><span>&nbsp;\u5b66\u4e60\u8bdd\u8bed\u7ea7 Q-\u51fd\u6570\u4e0e V-\u51fd\u6570\uff0c\u4ee5\u7ed3\u679c\u5956\u52b1\u8bc4\u4f30\u54cd\u5e94\u8d28\u91cf\uff1b\u5728\u4f4e\u5c42\uff0c\u5219\u4f7f\u7528 REINFORCE<\/span><span>&nbsp;[Sutton \u7b49\uff0c1999]&nbsp;<\/span><span>\u4f18\u5316\u8bcd\u5143\u7ea7 MDP\uff0c\u4f4e\u5c42\u5956\u52b1\u7531\u9ad8\u5c42\u4f18\u52bf\u51fd\u6570\u63d0\u4f9b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u901a\u8fc7\u5f15\u5165\u5c42\u6b21\u5b66\u4e60\u673a\u5236\uff0c\u5927\u8bed\u8a00\u6a21\u578b\u4e0d\u4ec5\u80fd\u591f\u5b9e\u73b0\u62bd\u8c61\u63a8\u7406\u6b65\u9aa4\u4e4b\u95f4\u7684\u8fde\u8d2f\u6027\uff0c\u8fd8\u80fd\u6446\u8131\u9010\u8bcd\u56de\u5fc6\u7684\u6a21\u5f0f\uff0c\u5b66\u4f1a\u66f4\u5177\u7ed3\u6784\u5316\u7684\u63a8\u7406\u7b56\u7565\uff0c\u4ece\u800c\u663e\u8457\u63d0\u5347\u5176\u5728\u590d\u6742\u63a8\u7406\u4efb\u52a1\u4e2d\u7684\u8868\u73b0\u80fd\u529b\u3002<\/span><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">4.3 \u8bc4\u4f30\u5668\u4f18\u5316<\/span><\/span><\/h2>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.3.1 \u8bad\u7ec3\u6570\u636e\u6784\u5efa<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u672c\u8282\u4e2d\uff0c\u6211\u4eec\u9996\u5148\u4ecb\u7ecd\u4f18\u5316\u8bc4\u4f30\u5668\u7684\u6570\u636e\u6784\u5efa\u65b9\u6cd5\uff0c\u5305\u62ec<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u7ed3\u679c\u7ea7\uff08outcome-level\uff09\u3001\u6b65\u9aa4\u7ea7\uff08step-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u548c<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7\uff08token-level\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u6570\u636e\u6784\u5efa\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7ed3\u679c\u7ea7&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u7ed3\u679c\u7ea7\u5956\u52b1\u6784\u5efa\u76f8\u5bf9\u76f4\u63a5\u3002\u65e9\u671fRLHF\u65b9\u6cd5\u4f9d\u8d56\u4eba\u5de5\u6807\u6ce8\u7684\u504f\u597d\u6570\u636e\u6765\u8bad\u7ec3\u5956\u52b1\u6a21\u578b\uff0c\u4f46\u9ad8\u6602\u7684\u4eba\u5de5\u6210\u672c\u63a8\u52a8\u4e86\u81ea\u52a8\u6807\u6ce8\u65b9\u6cd5\u7684\u53d1\u5c55\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6700\u7b80\u5355\u7684\u81ea\u52a8\u65b9\u6cd5\u662f\u5229\u7528\u7b54\u6848\u6807\u7b7e\u5c06\u89e3\u51b3\u65b9\u6848\u5212\u5206\u4e3a\u6b63\u786e\u4e0e\u9519\u8bef\uff0c\u5e76\u6784\u9020\u504f\u597d\u5bf9\uff0c\u57fa\u4e8eDPO\u65b9\u6cd5\u8bad\u7ec3\u5956\u52b1\u6a21\u578b<\/span><span>&nbsp;[Hosseini \u7b49\uff0c2024]<\/span><span>\u3002\u4e5f\u53ef\u4f7f\u7528\u66f4\u5f3a\u7684\u5927\u6a21\u578b\u8bc4\u4f30\u63a8\u7406\u6b63\u786e\u6027\uff0c\u5982 Lee \u7b49&nbsp;<\/span><span>[2024a]&nbsp;<\/span><span>\u5229\u7528\u66f4\u5f3a\u7684LLM\u5bf9\u54cd\u5e94\u6253\u5206<\/span><span>\uff080-10\u5206\uff09<\/span><span>\uff0c\u518d\u57fa\u4e8e\u8be5\u6570\u636e\u8bad\u7ec3\u5956\u52b1\u6a21\u578b\u3002\u6b64\u5916\uff0cMu \u7b49[2024]\u63d0\u51fa\u57fa\u4e8e\u89c4\u5219\u7684\u5956\u52b1\u673a\u5236\uff0c\u5c06\u671f\u671b\u884c\u4e3a\u62c6\u89e3\u4e3a\u5177\u4f53\u89c4\u5219\u5e76\u5206\u914d\u5f97\u5206\uff0c\u6700\u7ec8\u4e0e\u4f20\u7edfRLHF\u5956\u52b1\u7ed3\u5408\uff0c\u901a\u8fc7PPO\u4f18\u5316\u6a21\u578b\u3002\u7c7b\u4f3c\u5730\uff0cDeepSeek-AI \u7b49<\/span><span>&nbsp;[2025]&nbsp;<\/span><span>\u4e3a\u63a8\u7406\u4efb\u52a1\u8bbe\u8ba1\u4e86\u7ed3\u5408\u51c6\u786e\u6027\u4e0e\u683c\u5f0f\u89c4\u8303\u7684\u89c4\u5219\u5956\u52b1\u4f53\u7cfb\uff0c\u6784\u5efa\u4e86\u66f4\u5168\u9762\u7684\u8bad\u7ec3\u4fe1\u53f7\u3002<\/span><\/span><code style=\"font-size: 15px\"><span><br \/><\/span><\/code><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><code style=\"font-size: 15px\"><span><br \/><\/span><\/code><span style=\"font-size: 15px\"><span>\u8fd9\u4e9b\u81ea\u52a8\u65b9\u6cd5\u4e0d\u4ec5\u964d\u4f4e\u4e86\u5bf9\u4eba\u5de5\u6807\u6ce8\u7684\u4f9d\u8d56\uff0c\u8fd8\u63d0\u5347\u4e86\u5956\u52b1\u6a21\u578b\u8bad\u7ec3\u7684\u6548\u7387\u4e0e\u53ef\u6269\u5c55\u6027\uff0c\u63a8\u52a8\u4e86RLHF\u5728\u590d\u6742\u63a8\u7406\u4efb\u52a1\u4e2d\u7684\u5e94\u7528\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.2972222222222222\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-width=\"1185\" data-height=\"352\" data-imgfileid=\"100227508\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-8be8829a14d2b000c48f3a1e03b39262.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em;text-align: center\"><span style=\"font-size: 13px\" data-mpa-action-id=\"m96jgufu5x5\" data-pm-slice=\"0 0 []\"><span>\u56fe8\uff1a\u6784\u5efa\u6b65\u9aa4\u7ea7\u5956\u52b1\u8bad\u7ec3\u4fe1\u53f7\u7684\u56db\u79cd\u65b9\u6cd5\u7684\u8bf4\u660e\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6b65\u9aa4\u7ea7&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e3a\u83b7\u53d6\u6b65\u9aa4\u7ea7\u8bc4\u4f30\u4fe1\u53f7\uff0cOpenAI \u53d1\u5e03\u4e86\u8fc7\u7a0b\u5956\u52b1\u6570\u636e\u96c6 PRM800K&nbsp;<\/span><span>[Lightman \u7b49\uff0c2023]<\/span><span>\u3002\u7136\u800c\uff0c\u968f\u7740\u63a8\u7406\u4efb\u52a1\u6301\u7eed\u6d8c\u73b0\u4e14\u8bc4\u4f30\u5668\u9700\u5177\u5907\u826f\u597d\u6cdb\u5316\u80fd\u529b\uff0cPRM \u6570\u636e\u7684\u6269\u5c55\u663e\u5f97\u5fc5\u8981\u3002\u624b\u52a8\u6807\u6ce8\u4ee3\u4ef7\u9ad8\u6602\u3001\u96be\u4ee5\u6269\u5c55\uff0c\u800c\u57fa\u4e8eLLM\u7684\u8bc4\u5224\u65b9\u6cd5\u867d\u6613\u5b9e\u73b0\uff0c\u4f46\u5b58\u5728\u4e0d\u7a33\u5b9a\u6027\u4e0e\u566a\u58f0\u95ee\u9898<\/span><span>&nbsp;[Zheng \u7b49\uff0c2023\uff1bYe \u7b49\uff0c2025]<\/span><span>\uff0c\u56e0\u6b64\u66f4\u9ad8\u6548\u7684\u81ea\u52a8\u6807\u6ce8\u65b9\u6cd5\u6210\u4e3a\u7814\u7a76\u91cd\u70b9\u3002\u5f53\u524d\u81ea\u52a8\u6807\u6ce8\u65b9\u6cd5\u4e3b\u8981\u53ef\u5206\u4e3a\u4e09\u7c7b\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7b2c\u4e00\u7c7b\uff1a\u4f30\u8ba1\u6b65\u9aa4\u6b63\u786e\u6027\u3002<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp; Wang \u7b49<\/span><span>[2024g,m]<\/span><span>\u548c Jiao \u7b49<\/span><span>[2024]<\/span><span>\u901a\u8fc7\u8499\u7279\u5361\u6d1b\u91c7\u6837\u4f30\u7b97\u6b65\u9aa4\u5956\u52b1\uff0c\u4ee5\u6b65\u9aa4Si\u7684N\u6b21\u5b8c\u6210\u7684\u6210\u529f\u7387\u4e3a\u5176\u5956\u52b1\u3002Luo \u7b49<\/span><span>[2024]<\/span><span>\u7ed3\u5408\u4e8c\u5206\u67e5\u627e\u548cMCTS\u8bc6\u522b\u9996\u4e2a\u9519\u8bef\u6b65\u9aa4\uff0c\u63d0\u9ad8\u91c7\u6837\u6548\u7387\u3002Zhang \u7b49<\/span><span>[2024f]<\/span><span>\u3001Xia \u7b49<\/span><span>[2024]<\/span><span>\u548c Gao \u7b49<\/span><span>[2024a]<\/span><span>\u5219\u76f4\u63a5\u91c7\u7528LLM\u8bc4\u4f30\u6b65\u9aa4\u6b63\u786e\u6027\u3002Zhang \u7b49<\/span><span>[2025b]<\/span><span>\u6307\u51faMC\u91c7\u6837\u5b58\u5728\u8f83\u5927\u566a\u58f0\uff0c\u63d0\u51fa\u5171\u8bc6\u8fc7\u6ee4\u673a\u5236\uff0c\u5c06MC\u4f30\u8ba1\u4e0eLLM\u9a8c\u8bc1\u7ed3\u5408\uff0c\u4ee5\u63d0\u5347\u6570\u636e\u51c6\u786e\u6027\u3002\u4e0e\u6b64\u4e0d\u540c\uff0cChen \u7b49<\/span><span>[2024g]<\/span><span>\u5c06\u95ee\u9898\u5206\u89e3\u4e3a\u5b50\u95ee\u9898\uff0c\u5e76\u4ece\u6807\u51c6\u89e3\u4e2d\u63d0\u53d6\u4e2d\u95f4\u7ed3\u679c\uff0c\u5c06\u5176\u4e0e\u6a21\u578b\u751f\u6210\u7ed3\u679c\u6bd4\u5bf9\u8bc4\u4f30\u6b65\u9aa4\u6b63\u786e\u6027\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7b2c\u4e8c\u7c7b\uff1a\u57fa\u4e8e\u6807\u7b7e\u751f\u6210\u6b65\u9aa4\u5185\u5bb9\u3002<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp; \u6b64\u7c7b\u65b9\u6cd5\u4e3b\u52a8\u5411\u6b63\u786e\u63a8\u7406\u8fc7\u7a0b\u6ce8\u5165\u9519\u8bef\uff0c\u4ee5\u6784\u5efa\u5305\u542b\u9519\u8bef\u6b65\u9aa4\u7684\u6570\u636e\u96c6\u3002Yan \u7b49<\/span><span>[2024]<\/span><span>\u901a\u8fc7\u9ad8\u6e29\u91c7\u6837\u751f\u6210\u9519\u8bef\uff0c\u5e76\u57fa\u4e8e\u6b63\u786e\u89e3\u751f\u6210\u53cd\u601d\u4e0e\u4fee\u6b63\u3002Xi \u7b49<\/span><span>[2024]<\/span><span>\u5219\u4e3b\u52a8\u63d2\u5165\u9519\u8bef\u5e76\u5f15\u5bfc\u6a21\u578b\u751f\u6210\u53cd\u601d\uff0c\u6784\u9020\u9ad8\u8d28\u91cf\u7684\u4fee\u6b63\u6570\u636e\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7b2c\u4e09\u7c7b\uff1a\u901a\u8fc7\u7f6e\u4fe1\u5ea6\u53d8\u5316\u8bc4\u4f30\u6b65\u9aa4\u8d28\u91cf\u3002<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8be5\u7c7b\u65b9\u6cd5\u57fa\u4e8e\u4ee5\u4e0b\u5047\u8bbe\uff1a\u4f18\u8d28\u63a8\u7406\u6b65\u9aa4\u63d0\u5347\u63a8\u7406\u7f6e\u4fe1\u5ea6\uff0c\u52a3\u8d28\u6b65\u9aa4\u5219\u964d\u4f4e\u4e4b\u3002Lu \u7b49<\/span><span>[2024a]<\/span><span>\u63d0\u51fa\u5229\u7528\u7ed3\u679c\u76d1\u7763\u9a8c\u8bc1\u5668\u8bc4\u4f30\u76f8\u90bb\u6b65\u9aa4\u4e4b\u95f4\u7f6e\u4fe1\u5ea6\u53d8\u5316\uff0c\u4ee5\u6b64\u6807\u6ce8\u6b65\u9aa4\u6b63\u786e\u6027\uff0c\u540c\u65f6\u907f\u514d\u5927\u89c4\u6a21\u91c7\u6837\u4ee5\u964d\u4f4e\u8ba1\u7b97\u5f00\u9500\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u8bcd\u5143\u7ea7&nbsp;<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e3a\u83b7\u53d6\u66f4\u9ad8\u7ec6\u7c92\u5ea6\u7684\u5956\u52b1\u4fe1\u53f7\uff0c\u9700\u81ea\u52a8\u8bc4\u4f30\u5404\u8bcd\u5143\u7684\u91cd\u8981\u6027\u3002Chen \u7b49<\/span><span>[2024h]<\/span><span>\u8bad\u7ec3\u4e00\u4e2a\u53ef\u91cd\u5199\u539f\u59cb\u89e3\u7684\u751f\u6210\u5f0f\u5956\u52b1\u6a21\u578b\uff0c\u8f93\u5165\u91cd\u5199\u7ed3\u679c\u540e\uff0c\u539f\u59cb\u89e3\u4e2d\u6bcf\u4e2a\u8bcd\u5143\u7684\u9884\u6d4b\u6982\u7387\u5373\u4e3a\u5176\u5956\u52b1\u3002\u8be5\u65b9\u6cd5\u5047\u8bbe\uff1a\u9519\u8bef\u8bcd\u5143\u66f4\u53ef\u80fd\u88ab\u4fee\u6539\uff0c\u5176\u6982\u7387\u4e0b\u964d\uff1b\u800c\u6b63\u786e\u8bcd\u5143\u9884\u6d4b\u4e00\u81f4\uff0c\u6982\u7387\u66f4\u9ad8\u3002Yoon \u7b49<\/span><span>[2024]<\/span><span>\u91c7\u7528\u7c7b\u4f3c\u7b56\u7565\uff0c\u5229\u7528\u5f3aLLM\u5bf9\u9519\u8bef\u89e3 &nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;28911f35-9052-4e0e-83ad-cac69e0def78&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y_rn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99y0v7meu4\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sub><span>r&nbsp;<\/span><\/sub><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u8fdb\u884c\u201c\u6dfb\u52a0\u3001\u5220\u9664\u3001\u66ff\u6362\u201d\u4e09\u79cd\u64cd\u4f5c\u7684\u8fed\u4ee3\u4fee\u6b63\uff0c\u5e76\u901a\u8fc7\u4e0e\u4fee\u6539\u524d&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;5c01998e-307b-470c-8191-7f9a8aa3fb43&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y_mn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99y16ba1bh3\" data-pm-slice=\"0 0 []\"><span><span>y<\/span><\/span><sub><span>m<\/span><\/sub><span><span>&nbsp;<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u7684\u5bf9\u6bd4\uff0c\u4e3a\u6bcf\u4e2a\u8bcd\u5143\u6807\u6ce8\u5956\u52b1\u3002Rafailov \u7b49<\/span><span>[2024]<\/span><span>\u3001Zhong \u7b49<\/span><span>[2024]<\/span><span>\u4eceDPO\u6846\u67b6\u5bfc\u51fa\u9690\u5f0f\u5956\u52b1\uff0c\u5f62\u5f0f\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.30620985010706636\" data-s=\"300,640\" data-type=\"png\" data-w=\"934\" style=\"width: 146px;height: auto !important\" data-imgfileid=\"100227663\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-3a5cf3fb189f8b78aae3d9e5a5dfed3c.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;033b596b-27fc-4aba-93f9-6acabb3f8c66&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\beta\\log\\frac{\\pi_{dpo}(y_t|x,y_{&lt;t})}{\\pi_{ref}(y_t|x,y_{&lt;t})}.n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u4fe1\u53f7\u53ef\u7528\u4e8e\u8bcd\u5143\u7ea7\u6807\u6ce8\u3002Yang \u7b49<\/span><span>[2024b]<\/span><span>\u5c06\u5176\u4f5c\u4e3a\u521d\u59cb\u8bc4\u5206\u6807\u51c6\uff0c\u5bf9\u6b63\u786e\u63a8\u7406\u4e2d\u524dk%\u7684\u8bcd\u5143\u8d4b\u4e88\u5956\u52b11\uff0c\u5176\u4f59\u4e3a0\uff1b\u5bf9\u9519\u8bef\u63a8\u7406\u4e2d\u540ek%\u7684\u8bcd\u5143\u8d4b\u503c\u4e3a-1\u3002OREA<\/span><span>&nbsp;[Lyu \u7b49\uff0c2025]<\/span><span>\u8fdb\u4e00\u6b65\u5c06\u8bcd\u5143\u7ea7\u5956\u52b1\u603b\u548c\u4e0e\u6574\u4f53\u7ed3\u679c\u5956\u52b1\u5bf9\u9f50\uff0c\u4ece\u800c\u5b9e\u73b0\u8bcd\u5143\u7ea7\u5956\u52b1\u6a21\u578b\u7684\u5b66\u4e60\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">4.3.2 \u8bad\u7ec3\u683c\u5f0f<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u70b9\u5f0f\uff08Point-wise\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u5f53\u8bc4\u4f30\u7ed3\u679c\u4e3a\u6807\u91cf\u503c\u65f6\uff0c\u6700\u76f4\u63a5\u7684\u65b9\u5f0f\u662f\u901a\u8fc7\u76d1\u7763\u5b66\u4e60\u8bad\u7ec3\u8bc4\u4f30\u6a21\u578b\u3002\u4f8b\u5982\uff0cWang \u7b49<\/span><span>[2024g,m]<\/span><span>\u91c7\u6837\u5e76\u5b8c\u6210\u63a8\u7406\u6b65\u9aa4\uff0c\u4f7f\u7528\u5b8c\u6574\u8def\u5f84\u7684\u6210\u529f\u6982\u7387\u4f5c\u4e3a\u6bcf\u4e00\u6b65\u7684\u8bc4\u5206\uff0c\u4ece\u800c\u8bad\u7ec3\u6b65\u9aa4\u7ea7\u7684\u8fc7\u7a0b\u76d1\u7763\u9a8c\u8bc1\u5668<\/span><span>\uff08PSV\uff09<\/span><span>\u3002Lu \u7b49<\/span><span>[2024a]<\/span><span>\u5219\u5148\u57fa\u4e8e\u771f\u5b9e\u7b54\u6848\u6807\u6ce8\u6bcf\u4e2a\u63a8\u7406\u6b65\u9aa4\uff0c\u5e76\u8bad\u7ec3\u7ed3\u679c\u76d1\u7763\u9a8c\u8bc1\u5668<\/span><span>\uff08OSV\uff09<\/span><span>\u4ee5\u4f30\u8ba1\u6bcf\u4e00\u6b65\u901a\u5411\u6b63\u786e\u89e3\u7684\u6982\u7387\uff1b\u968f\u540e\uff0c\u901a\u8fc7\u8ba1\u7b97\u76f8\u90bb\u6b65\u9aa4\u95f4\u7684\u7f6e\u4fe1\u5ea6\u53d8\u5316\u751f\u6210\u6b65\u9aa4\u7ea7\u6807\u7b7e\uff0c\u7528\u4ee5\u8bad\u7ec3PSV\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5bf9\u5f0f\uff08Pair-wise\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>&nbsp;\u53d7 Bradley-Terry \u6a21\u578b<\/span><span>[Bradley and Terry, 1952]<\/span><span>\u542f\u53d1\uff0c\u591a\u9879\u7814\u7a76\u91c7\u7528\u504f\u597d\u5b66\u4e60\u8bad\u7ec3\u8bc4\u4f30\u5668\u3002\u8be5\u65b9\u6cd5\u6784\u9020\u504f\u597d\u5bf9&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;bfaeff02-dceb-4bad-8adb-dd048ef058db&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;(x, y^+, y^\u2212)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m99y3zpu1x8l\" data-pm-slice=\"0 0 []\"><span><span>(x, y<\/span><\/span><sup><span>+<\/span><\/sup><span data-mpa-action-id=\"m99y42mp1bht\" data-pm-slice=\"0 0 []\"><span><span>, y<\/span><\/span><sup><span>\u2212<\/span><\/sup><span><span>)<\/span><\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff0c\u5e76\u57fa\u4e8e\u5982\u4e0b\u76ee\u6807\u4f18\u5316\u8bc4\u4f30\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;2cd93bb0-18e9-486c-adf7-4cb1f7b137b2&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;r(., .)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>r(., .)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.062037037037037036\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 354px;height: auto !important\" data-imgfileid=\"100227664\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-858bae53f9c03fb6f89fbb1f10122de5.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;5b35a695-fd68-4ec9-9ca3-2b0bab6f8bae&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\max\\mathbb{E}_{(x,y^+,y^-)\\in D}\\log(\\sigma(r(x,y^+)-r(x,y^-))),n\\ \\ \\ \\ (22)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8be5\u65b9\u6cd5\u65e0\u9700\u7cbe\u786e\u6807\u6ce8\u5206\u6570\uff0c\u4ec5\u4f9d\u8d56\u504f\u597d\u6570\u636e\u8bad\u7ec3\u3002\u4f8b\u5982\uff0cYu \u7b49<\/span><span>[2024b]<\/span><span>\u548c Hosseini \u7b49<\/span><span>[2024]<\/span><span>\u91c7\u7528 DPO \u4ece\u504f\u597d\u5bf9\u4e2d\u5b66\u4e60\u5956\u52b1\u51fd\u6570\uff0cLiang \u7b49<\/span><span>[2024]<\/span><span>\u57fa\u4e8e\u7b54\u6848\u6b63\u786e\u6027\u5bf9\u6765\u81ea\u591a\u4e2a\u6a21\u578b\u7684\u89e3\u8fdb\u884c\u504f\u597d\u5212\u5206\uff0c\u5e76\u4f7f\u7528 SimPO<\/span><span>&nbsp;[Meng \u7b49\uff0c2024b]<\/span><span>\u8bad\u7ec3\u8bc4\u4f30\u6a21\u578b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u514b\u670d\u73b0\u6709\u9a8c\u8bc1\u5668\u4ec5\u5728\u4e8c\u5143\u6807\u7b7e\u8def\u5f84\u4e0a\u8bad\u7ec3\u3001\u96be\u4ee5\u523b\u753b\u4e2d\u95f4\u6b65\u9aa4\u95f4\u76f8\u5bf9\u4f18\u52a3\u7684\u95ee\u9898\uff0cHe \u7b49<\/span><span>[2024b]<\/span><span>\u63d0\u51fa\u6811\u7ed3\u6784\u65b9\u6cd5\uff1a\u5bf9\u6bcf\u4e2a\u6811\u8282\u70b9\u91c7\u6837\u8865\u5168\u8def\u5f84\uff0c\u5e76\u4ee5\u5176\u5bfc\u5411\u6b63\u786e\u89e3\u7684\u6bd4\u4f8b\u4f5c\u4e3a\u5956\u52b1\uff0c\u8fdb\u4e00\u6b65\u901a\u8fc7\u5144\u5f1f\u8282\u70b9\u95f4\u7684\u5956\u52b1\u6bd4\u8f83\uff0c\u6784\u9020\u6b65\u9aa4\u7ea7\u504f\u597d\u5bf9\uff0c\u5e76\u4ee5\u6392\u5e8f\u635f\u5931\u8bad\u7ec3\u9a8c\u8bc1\u5668\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6b64\u57fa\u7840\u4e0a\uff0cYuan \u7b49<\/span><span>[2023b]<\/span><span>\u63d0\u51fa\u5956\u52b1\u52a0\u6743\u504f\u597d\u5b66\u4e60<\/span><span>\uff08RRHF\uff09<\/span><span>\uff0c\u901a\u8fc7\u4ece\u591a\u4e2a\u6765\u6e90<\/span><span>\uff08\u5982\u6a21\u578b\u81ea\u8eab\u3001\u5176\u4ed6\u5927\u8bed\u8a00\u6a21\u578b\u3001\u4eba\u7c7b\u4e13\u5bb6\uff09<\/span><span>\u91c7\u6837\u54cd\u5e94\uff0c\u5e76\u6839\u636e\u4eba\u5de5\u504f\u597d\u6216\u6a21\u578b\u6253\u5206\u8fdb\u884c\u6392\u5e8f\uff0c\u5b9e\u73b0\u5bf9\u54cd\u5e94\u6761\u4ef6\u6982\u7387\u7684\u6392\u5e8f\u4f18\u5316\uff0c\u4ece\u800c\u63d0\u5347\u751f\u6210\u8d28\u91cf\u3002\u4e0a\u8ff0\u65b9\u6cd5\u5171\u540c\u63a8\u52a8\u4e86\u504f\u597d\u9a71\u52a8\u8bc4\u4f30\u4e0e\u4f18\u5316\u6846\u67b6\u7684\u9ad8\u6548\u53d1\u5c55\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u81ea\u56de\u5f52\uff08Autoregressive\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8fd1\u5e74\u6765\uff0c\u5145\u5206\u6316\u6398\u5927\u8bed\u8a00\u6a21\u578b\u81ea\u8eab\u7684\u751f\u6210\u80fd\u529b\uff0c\u6210\u4e3a\u63d0\u5347\u8bc4\u4f30\u5668\u9c81\u68d2\u6027\u4e0e\u53ef\u89e3\u91ca\u6027\u7684\u5173\u952e\u7b56\u7565\u3002\u76f8\u5173\u65b9\u6cd5\u4e3b\u8981\u6309\u53cd\u9988\u5f62\u5f0f\u53ef\u5206\u4e3a\u4e24\u7c7b\uff1a\u6982\u7387\u5206\u6570\u578b<\/span><span>\uff08probability scores\uff09<\/span><span>\u4e0e\u53e3\u5934\u6279\u8bc4\u578b<\/span><span>\uff08verbal critiques\uff09<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6982\u7387\u578b\u65b9\u6cd5\u4e2d\uff0c\u7814\u7a76\u8005\u4ece\u81ea\u7136\u8bed\u8a00\u53cd\u9988\u4e2d\u63d0\u53d6\u7279\u5b9a\u8bcd\u5143\u7684\u751f\u6210\u6982\u7387\u4f5c\u4e3a\u8bc4\u5206\u4f9d\u636e\u3002\u4f8b\u5982\uff0cZhang \u7b49<\/span><span>[2024f]<\/span><span>\u63d0\u793a\u6a21\u578b\u56de\u7b54\u201c\u7b54\u6848\u662f\u5426\u6b63\u786e<\/span><span>\uff08\u662f\/\u5426\uff09<\/span><span>\u201d\uff0c\u5e76\u5c06\u751f\u6210\u201c\u662f\u201d\u7684\u6982\u7387\u4f5c\u4e3a\u8bc4\u5206\u4f9d\u636e\uff0c\u5bf9\u5e94\u4f18\u5316\u76ee\u6807\u4e3a\u751f\u6210\u6b63\u786e\u7b54\u6848\u8bcd\u5143\u3002\u4e3a\u63d0\u5347\u89e3\u91ca\u6027\u4e0e\u7a33\u5065\u6027\uff0cZhang \u7b49<\/span><span>[2024f]<\/span><span>\u3001Ankner \u7b49<\/span><span>[2024b]<\/span><span>\u3001Gao \u7b49<\/span><span>[2024a]<\/span><span>\u5f15\u5165\u601d\u7ef4\u94fe<\/span><span>\uff08CoT\uff09<\/span><span>\u8f85\u52a9\u7b54\u6848\u751f\u6210\uff0c\u91c7\u7528\u201c\u4e24\u9636\u6bb5\u8bad\u7ec3\u201d\uff1a\u5148\u751f\u6210\u53ef\u89e3\u91ca\u601d\u7ef4\u8fc7\u7a0b\uff0c\u518d\u636e\u6b64\u8f93\u51fa\u7b54\u6848\u3002\u6b64\u5916\uff0cMahan \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa CoT-GenRM-STaR\uff0c\u7ed3\u5408\u8bef\u5224\u6570\u636e\u4e0e DPO \u4f18\u5316\u751f\u6210\u5f0f\u5956\u52b1\u6a21\u578b\uff0c\u8fdb\u4e00\u6b65\u63a8\u52a8\u8be5\u65b9\u5411\u7814\u7a76\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u76f8\u6bd4\u4e4b\u4e0b\uff0c\u53e3\u5934\u53cd\u9988\u63d0\u4f9b\u66f4\u4e30\u5bcc\u7684\u4e0a\u4e0b\u6587\u4fe1\u606f\uff0c\u5982\u9519\u8bef\u4f4d\u7f6e\u4e0e\u539f\u56e0\uff0c\u4fbf\u4e8e\u6307\u5bfc\u540e\u7eed\u7ea0\u9519\u4e0e\u56de\u6eaf\u3002\u8be5\u7c7b\u80fd\u529b\u7684\u4f18\u5316\u8def\u5f84\u53ef\u5206\u4e3a\u884c\u4e3a\u514b\u9686<\/span><span>\uff08BC\uff09<\/span><span>\u4e0e\u5f3a\u5316\u5b66\u4e60<\/span><span>\uff08RL\uff09<\/span><span>\u3002BC \u662f\u6700\u76f4\u63a5\u7b56\u7565\uff0c\u5176\u6570\u636e\u6784\u5efa\u65b9\u5f0f\u8be6\u89c1\u7b2c 4.3.1 \u8282\u3002Xi \u7b49<\/span><span>[2024]<\/span><span>\u901a\u8fc7\u4eba\u4e3a\u6ce8\u5165\u566a\u58f0\u5e76\u5f15\u5bfc\u6a21\u578b\u751f\u6210\u6279\u8bc4\uff0c\u8fdb\u800c\u8bad\u7ec3\u8bc4\u4f30\u5668\u3002\u7531\u4e8e\u6784\u9020\u6570\u636e\u7684\u56f0\u96be\uff0c\u4e0d\u5c11\u7814\u7a76\u8f6c\u5411 RL \u65b9\u6cd5\u5b66\u4e60\u81ea\u8bc4\u80fd\u529b\u3002RL4F<\/span><span>&nbsp;[Aky\u00fcrek \u7b49\uff0c2023]&nbsp;<\/span><span>\u4e0e Retroformer<\/span><span>&nbsp;[Yao \u7b49\uff0c2024b]<\/span><span>\u5c06\u53cd\u9988\u751f\u6210\u5efa\u6a21\u4e3a RL \u4efb\u52a1\uff1a\u72b6\u6001\u4e3a\u5f53\u524d\u751f\u6210\u5185\u5bb9\u4e0e\u73af\u5883\u53cd\u9988\uff0c\u884c\u52a8\u4e3a\u751f\u6210\u5177\u4f53\u53cd\u9988\uff0c\u524d\u540e\u8d28\u91cf\u5dee\u5f02\u4f5c\u4e3a\u5956\u52b1\u4fe1\u53f7\u3002Xie \u7b49<\/span><span>[2025b]<\/span><span>\u5219\u91c7\u7528 BC \u521d\u59cb\u5316\u6a21\u578b\uff0c\u5e76\u901a\u8fc7 RL \u7cbe\u8c03\uff0c\u5c55\u73b0\u4e86\u4e24\u8005\u4e92\u8865\u6027\u3002\u6700\u524d\u6cbf\u7684\u5f00\u6e90\u5de5\u4f5c\uff0c\u5982 R1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]&nbsp;<\/span><span>\u4e0e Kimi k1.5<\/span><span>&nbsp;[Team \u7b49\uff0c2025]<\/span><span>\uff0c\u5747\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u5f15\u5165\u53e3\u5934\u53cd\u9988\u81ea\u8bc4\u673a\u5236\uff0c\u5e76\u501f\u52a9\u57fa\u4e8e\u89c4\u5219\u7684\u5956\u52b1\u6846\u67b6\uff0c\u901a\u8fc7 RL \u8054\u5408\u4f18\u5316\u9010\u6b65\u63a8\u7406\u4e0e\u81ea\u6211\u8bc4\u4f30\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">4.4 \u540e\u5904\u7406\u5668\u4f18\u5316<\/span><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u540e\u5904\u7406\u9636\u6bb5\uff0c\u7814\u7a76\u91cd\u70b9\u5728\u4e8e\u63d0\u5347\u6a21\u578b\u7684\u7ea0\u9519\u80fd\u529b\u3002\u6839\u636e\u4f18\u5316\u65b9\u5f0f\u7684\u4e0d\u540c\uff0c\u8fd9\u4e9b\u65b9\u6cd5\u53ef\u5212\u5206\u4e3a\u4e24\u7c7b\uff1a<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u884c\u4e3a\u514b\u9686\uff08Behavior Cloning\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u4e0e<\/span><\/span><strong style=\"font-size: 15px\"><span><span>\u5f3a\u5316\u5b66\u4e60\uff08Reinforcement Learning\uff09<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u884c\u4e3a\u514b\u9686<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8be5\u7c7b\u65b9\u6cd5\u5305\u62ec\u4e24\u7c7b\u601d\u8def\uff1a\u589e\u5f3a\u6a21\u578b\u81ea\u8eab\u7684\u81ea\u6211\u7ea0\u9519\u80fd\u529b\uff0c\u6216\u8bad\u7ec3\u8f85\u52a9\u6a21\u578b\u534f\u52a9\u5b8c\u6210\u7ea0\u9519\u3002\u524d\u8005\u5982 Zhang \u7b49<\/span><span>[2024a]<\/span><span>\u3001An \u7b49<\/span><span>[2023]<\/span><span>\u3001Yan \u7b49<\/span><span>[2024]<\/span><span>\u3001Paul \u7b49<\/span><span>[2024]<\/span><span>\u3001Gao \u7b49<\/span><span>[2024c]<\/span><span>\u901a\u8fc7\u9519\u8bef\u91c7\u6837\u6280\u672f\uff0c\u5229\u7528\u66f4\u5f3a\u7684\u5916\u90e8\u6a21\u578b\u6216\u591a\u8f6e\u81ea\u751f\u6210\u6837\u672c\u751f\u6210\u7ea0\u9519\u6570\u636e\uff0c\u5e76\u636e\u6b64\u8fdb\u884c\u76d1\u7763\u5fae\u8c03<\/span><span>\uff08SFT\uff09<\/span><span>\uff0c\u63d0\u5347\u6a21\u578b\u7684\u81ea\u6211\u7ea0\u9519\u80fd\u529b\u3002Du \u7b49[2024]\u5219\u6784\u5efa\u6e10\u8fdb\u5f0f\u8bad\u7ec3\u96c6\uff0c\u5f3a\u5316\u6a21\u578b\u7684\u9010\u6b65\u6539\u8fdb\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u540e\u8005\u5219\u805a\u7126\u4e8e\u6784\u5efa\u4e13\u95e8\u7684\u8f85\u52a9\u6a21\u578b\u3002\u4f8b\u5982\uff0cWelleck \u7b49<\/span><span>[2023]<\/span><span>\u3001Zhang \u7b49<\/span><span>[2024i]<\/span><span>\u3001Wadhwa \u7b49<\/span><span>[2024]<\/span><span>\u8bad\u7ec3\u72ec\u7acb\u7684\u7cbe\u70bc\u5668\u6a21\u578b\u7528\u4e8e\u7b54\u6848\u4fee\u6b63\u3002Shridhar \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa\u63d0\u95ee\u5668\u6a21\u578b\uff0c\u5224\u65ad\u662f\u5426\u9700\u8981\u7ea0\u9519\u5e76\u8f85\u52a9\u5b9e\u65bd\u7ea0\u9519\u64cd\u4f5c\u3002Wang \u7b49<\/span><span>[2024l]<\/span><span>\u5219\u5c06\u6a21\u578b\u5728\u53cd\u601d\u9636\u6bb5\u7684\u77e5\u8bc6\u6574\u7406\u6210\u4ee3\u7801\u672c\uff0c\u4ee5\u5b9e\u73b0\u77e5\u8bc6\u7684\u5b58\u50a8\u3001\u68c0\u7d22\u4e0e\u590d\u7528\uff0c\u8fdb\u4e00\u6b65\u63d0\u5347\u6a21\u578b\u7684\u89e3\u51b3\u95ee\u9898\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5f3a\u5316\u5b66\u4e60<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Kumar \u7b49<\/span><span>[2024]<\/span><span>\u6307\u51fa\uff0c\u57fa\u4e8e SFT \u7684\u65b9\u6cd5\u5728\u81ea\u6211\u7ea0\u9519\u5b66\u4e60\u4e2d\u5b58\u5728\u4e24\u5927\u6311\u6218\uff1a\u5176\u4e00\u662f\u5206\u5e03\u504f\u79fb\uff0c\u5373\u6a21\u578b\u80fd\u7ea0\u6b63\u521d\u59cb\u6a21\u578b\u7684\u9519\u8bef\uff0c\u4f46\u96be\u4ee5\u5e94\u5bf9\u81ea\u8eab\u751f\u6210\u7684\u65b0\u9519\u8bef\uff1b\u5176\u4e8c\u662f\u884c\u4e3a\u5d29\u6e83\uff0c\u5373\u6a21\u578b\u504f\u5411\u4f18\u5316\u521d\u59cb\u8f93\u51fa\uff0c\u5ffd\u7565\u771f\u6b63\u7684\u7ea0\u9519\u884c\u4e3a\u3002\u4e3a\u6b64\uff0c\u4ed6\u4eec\u63d0\u51fa\u4e00\u79cd\u7b56\u7565\u5185\u7684\u591a\u8f6e\u5f3a\u5316\u5b66\u4e60\u65b9\u6cd5\uff0c\u5c3d\u7ba1\u8be5\u65b9\u6cd5\u4ec5\u751f\u6210\u4e24\u8f6e\u89e3\u7b54<\/span><span>\uff08\u521d\u59cb\u89e3\u4e0e\u4fee\u8ba2\u89e3\uff09<\/span><span>\uff0c\u5e76\u672a\u5f15\u5165\u63a8\u7406\u8fc7\u7a0b\u4e2d\u7684\u53cd\u9988\u4fe1\u606f\uff0c\u56e0\u6b64\u4ecd\u96be\u4ee5\u5145\u5206\u5229\u7528\u5916\u90e8\u4fe1\u53f7\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e3a\u8fdb\u4e00\u6b65\u7a81\u7834\u8be5\u9650\u5236\uff0cGehring \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa\u6574\u5408\u5916\u90e8\u6267\u884c\u53cd\u9988\u7684\u5f3a\u5316\u5b66\u4e60\u7b97\u6cd5\uff0c\u4f7f\u6a21\u578b\u80fd\u6709\u6548\u5438\u6536\u5916\u90e8\u53cd\u9988\u4fe1\u53f7\uff0c\u589e\u5f3a\u81ea\u6211\u4f18\u5316\u80fd\u529b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cR1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u3001Kimi k1.5<\/span><span>&nbsp;[Team \u7b49\uff0c2025]&nbsp;<\/span><span>\u53ca T1&nbsp;<\/span><span>[Hou \u7b49\uff0c2025]<\/span><span>\u7b49\u7814\u7a76\u5e76\u672a\u5728\u67b6\u6784\u4e0a\u660e\u786e\u533a\u5206\u63a8\u7406\u5668\u3001\u8bc4\u4f30\u5668\u4e0e\u540e\u5904\u7406\u5668\u6a21\u5757\u3002\u76f8\u53cd\uff0c\u8fd9\u4e9b\u80fd\u529b\u5728\u7edf\u4e00\u7684\u52a8\u4f5c\u7a7a\u95f4\u5185\uff0c\u5728\u76f8\u540c\u7684\u7ed3\u679c\u5956\u52b1\u4fe1\u53f7\u6307\u5bfc\u4e0b\u88ab\u534f\u540c\u4f18\u5316\uff0c\u4ece\u800c\u5b9e\u73b0\u7edf\u4e00\u7684\u63a8\u7406\u3001\u81ea\u8bc4\u4e0e\u7ea0\u9519\u5b66\u4e60\u3002<\/span><\/span><code style=\"font-size: 15px\"><span><br \/><\/span><\/code><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><code style=\"font-size: 15px\"><span><br \/><\/span><\/code><span style=\"font-size: 15px\"><span>\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0cR1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u3001Kimi k1.5&nbsp;<\/span><span>[Team \u7b49\uff0c2025]<\/span><span>\u548cT1&nbsp;<\/span><span>[Hou \u7b49\uff0c2025]<\/span><span>\u7b49\u7814\u7a76\u5728\u7269\u7406\u4e0a\u5e76\u672a\u660e\u786e\u533a\u5206\u63a8\u7406\u5668\u3001\u8bc4\u4f30\u5668\u548c\u540e\u5904\u7406\u5668\u3002\u76f8\u53cd\uff0c\u5728\u76f8\u540c\u7684\u7ed3\u679c\u5956\u52b1\u6307\u5bfc\u4e0b\uff0c\u63a8\u7406\u3001\u81ea\u6211\u8bc4\u4f30\u3001\u81ea\u6211\u7ea0\u6b63\u7b49\u80fd\u529b\u5728\u76f8\u540c\u7684\u884c\u52a8\u7a7a\u95f4\u4e2d\u540c\u65f6\u5f97\u5230\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.2777777777777778\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-width=\"1226\" data-height=\"340\" data-imgfileid=\"100227510\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-5a953efa4e96794e5ce44fca38bf5e89.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.5em\" data-mpa-action-id=\"m96jfsnxr5e\" data-pm-slice=\"0 0 []\"><span style=\"font-size: 13px\"><strong><span><span>\u56fe 9\uff1a\u5bf9\u81ea\u6211\u8fdb\u5316\u7684\u76f4\u89c2\u7406\u89e3&nbsp;<\/span><\/span><\/strong><span><span>\u6bcf\u8f6e\u81ea\u6211\u8fdb\u5316\u7531\u4e24\u90e8\u5206\u7ec4\u6210\uff1a\u6570\u636e\u8fdb\u5316\u4e0e\u6a21\u578b\u8fdb\u5316\u3002\u7cfb\u7edf\u9996\u5148\u901a\u8fc7\u6570\u636e\u8fdb\u5316\uff08\u4e3b\u8981\u57fa\u4e8e\u641c\u7d22\uff09\u7a81\u7834\u539f\u6709\u80fd\u529b\u8fb9\u754c\uff0c\u83b7\u5f97\u66f4\u9ad8\u8d28\u91cf\u7684\u89e3\u7b54\u4f5c\u4e3a\u65b0\u4e00\u8f6e\u8bad\u7ec3\u6570\u636e\uff1b\u968f\u540e\uff0c\u7cfb\u7edf\u901a\u8fc7\u6a21\u578b\u8fdb\u5316\u4ece\u8fd9\u4e9b\u6570\u636e\u4e2d\u5b66\u4e60\uff0c\u4ece\u800c\u6269\u5c55\u5176\u80fd\u529b\u8fb9\u754c\uff0c\u5b9e\u73b0\u6027\u80fd\u7684\u6301\u7eed\u63d0\u5347\u3002<\/span><\/span><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>5. \u81ea\u6211\u8fdb\u5316<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u201c\u6570\u636e\u8fdb\u5316\u201d\u90e8\u5206\uff0c\u6211\u4eec\u63a2\u8ba8\u4e86\u5982\u4f55\u901a\u8fc7\u4efb\u52a1\u8fdb\u5316\u4e0e\u601d\u7ef4\u94fe\u8fdb\u5316\u751f\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u8bad\u7ec3\u6570\u636e\uff1b\u5728\u201c\u6a21\u578b\u8fdb\u5316\u201d\u90e8\u5206\uff0c\u6211\u4eec\u7814\u7a76\u4e86\u63d0\u5347\u7cfb\u7edf\u5404\u6a21\u5757\u6027\u80fd\u7684\u65b9\u6cd5\u3002\u7136\u800c\uff0c\u4ec5\u4f9d\u8d56\u6570\u636e\u6216\u6a21\u578b\u5355\u4e00\u65b9\u5411\u7684\u8fdb\u5316\uff0c\u96be\u4ee5\u6784\u5efa\u51fa\u6027\u80fd\u5353\u8d8a\u7684\u63a8\u7406\u7cfb\u7edf\u3002\u6570\u636e\u8fdb\u5316\u867d\u53ef\u901a\u8fc7\u63a8\u7406\u9636\u6bb5\u7684\u8ba1\u7b97\u5f00\u9500\u63d0\u5347\u6a21\u578b\u6027\u80fd\uff0c\u5374\u53d7\u9650\u4e8e\u6a21\u578b\u672c\u8eab\u7684\u80fd\u529b\u4e0a\u9650\uff1b\u800c\u6a21\u578b\u8fdb\u5316\u82e5\u7f3a\u4e4f\u9ad8\u8d28\u91cf\u6570\u636e\u652f\u6301\uff0c\u4e5f\u65e0\u6cd5\u5b9e\u73b0\u6301\u7eed\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u56e0\u6b64\uff0c\u672c\u8282\u5173\u6ce8\u201c\u81ea\u6211\u8fdb\u5316\u201d\uff0c\u5373\u5728\u5faa\u73af\u673a\u5236\u4e2d\u878d\u5408\u6570\u636e\u4e0e\u6a21\u578b\u8fdb\u5316\uff0c\u5b9e\u73b0\u7cfb\u7edf\u7684\u6301\u7eed\u589e\u5f3a\u3002\u5982\u56fe 9 \u6240\u793a\uff0c\u6211\u4eec\u76f4\u89c2\u5c55\u793a\u4e86\u81ea\u6211\u8fdb\u5316\u7684\u8fd0\u4f5c\u673a\u5236\uff1a\u63a8\u7406\u7cfb\u7edf\u65e0\u9700\u4eba\u5de5\u4ecb\u5165\uff0c\u4f9d\u9760\u81ea\u8eab\u4e0d\u65ad\u751f\u6210\u6570\u636e\u5e76\u8fed\u4ee3\u4f18\u5316\u80fd\u529b\u3002\u7136\u800c\u5728\u5b9e\u8df5\u4e2d\uff0c\u81ea\u6211\u8fdb\u5316\u4ecd\u9762\u4e34\u82e5\u5e72\u5173\u952e\u6311\u6218\uff0c\u4f8b\u5982\u5982\u4f55\u4fdd\u8bc1\u7cfb\u7edf\u6027\u80fd\u6301\u7eed\u63d0\u5347\u3001\u5982\u4f55\u534f\u8c03\u6a21\u5757\u95f4\u7684\u534f\u540c\u8fdb\u5316\u3002\u63a5\u4e0b\u6765\uff0c\u6211\u4eec\u5c06\u4f9d\u6b21\u63a2\u8ba8\u5176\u6536\u655b\u6027\u7406\u8bba\u3001\u81ea\u6211\u8fdb\u5316\u7684\u89c4\u6a21\u6cd5\u5219\u3001\u81ea\u8fdb\u5316\u7b56\u7565\u4e0e\u6a21\u5f0f\u7684\u5b9e\u8df5\u5de5\u4f5c\uff0c\u6700\u540e\u4ece\u81ea\u6211\u8fdb\u5316\u7684\u89c6\u89d2\u91cd\u91ca\u5178\u578b\u7684\u7c7b O1 \u5de5\u4f5c\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">5.1 \u81ea\u6211\u8fdb\u5316\u7684\u7406\u8bba\u57fa\u7840<\/span><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 8px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u81ea\u6211\u8fdb\u5316\u8981\u6c42\u7cfb\u7edf\u5229\u7528\u81ea\u8eab\u751f\u6210\u7684\u6570\u636e\uff0c\u5728\u65e0\u5916\u90e8\u5e72\u9884\u7684\u524d\u63d0\u4e0b\u6301\u7eed\u63d0\u5347\u6027\u80fd&nbsp;<\/span><span>[Zelikman \u7b49\uff0c2022]<\/span><span>\u3002\u8fd9\u4e00\u201c\u81ea\u9a71\u52a8\u201d\u8bad\u7ec3\u8fc7\u7a0b\u7684\u7406\u8bba\u57fa\u7840\u4e9f\u9700\u5398\u6e05\u3002\u4e3a\u9a8c\u8bc1\u5176\u6709\u6548\u6027\uff0c\u9700\u56de\u7b54\u4e24\u4e2a\u7814\u7a76\u95ee\u9898\uff1a<\/span><\/span><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 8px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>[\u7814\u7a76\u95ee\u9898 1] \u63a8\u7406\u7684\u81ea\u6211\u8fdb\u5316\u662f\u5426\u9075\u5faa\u89c4\u6a21\u6cd5\u5219\uff1f<\/span><\/span><\/p>\n<\/li>\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>[\u7814\u7a76\u95ee\u9898 2] \u54ea\u4e9b\u5173\u952e\u56e0\u7d20\u4fc3\u4f7f\u81ea\u6211\u8fdb\u5316\u5b9e\u73b0\u6301\u7eed\u6027\u80fd\u63d0\u5347\uff1f<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>[\u7814\u7a76\u95ee\u9898 1] \u63a8\u7406\u7684\u81ea\u6211\u8fdb\u5316\u662f\u5426\u9075\u5faa\u89c4\u6a21\u6cd5\u5219\uff1f<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6211\u4eec\u9996\u5148\u8ba8\u8bba\u5728\u4efb\u52a1\u96c6\u56fa\u5b9a\u7684\u524d\u63d0\u4e0b\uff0c\u81ea\u6211\u8fdb\u5316\u662f\u5426\u80fd\u591f\u6536\u655b\u3002Singh \u7b49<\/span><span>&nbsp;[2023]&nbsp;<\/span><span>\u4ece\u671f\u671b\u6700\u5927\u5316<\/span><span>\uff08Expectation Maximization, EM\uff09[Moon, 1996]&nbsp;<\/span><span>\u89c6\u89d2\u5bf9\u6b64\u8fdb\u884c\u4e86\u5efa\u6a21\u3002\u5177\u4f53\u800c\u8a00\uff0c\u63a8\u7406\u4efb\u52a1\u5f62\u5f0f\u5316\u4e3a<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;56364946-b1b0-4dd8-a9d4-204b5efb31e8&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\hat{p}(z=\\hat{z}|x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.24778761061946902\" data-type=\"png\" data-w=\"452\" style=\"vertical-align: baseline;width: 78px;height: auto !important\" width=\"78\" data-width=\"78px\" data-imgfileid=\"100227665\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-910957ee283ea75d372e8edf9306153b.png\" \/><\/span><\/sub><span><span>\uff0c\u5176\u4e2dx<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;b8c90be6-105c-4877-92f4-8c52085f7a5c&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;x&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>\u4e3a\u8f93\u5165\u95ee\u9898\uff0c&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;68b33997-bf01-4323-a5f9-8c61605291e3&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\hat{z}&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"1.2\" data-type=\"png\" data-w=\"75\" style=\"vertical-align: baseline;width: 17px;height: auto !important\" data-width=\"60px\" data-croporisrc=\"https:\/\/mmbiz.qlogo.cn\/mmbiz_png\/wibWV1DB7tWIfpdmZsNWdnel3miccib5nmQHIWARBvcukZ0Op56P5Piaiaj0ZNtuqJyib7TJXIG8D6GnCNL8LboAvwkg\/0?wx_fmt=png&amp;from=appmsg\" data-cropx1=\"187.76666666666665\" data-cropx2=\"262\" data-cropy2=\"91.70000000000002\" data-imgfileid=\"100227667\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-59ac8f7f13a96ebd03a6965a7d7fb37d.jpg\" \/><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u4e3a\u6b63\u786e\u7b54\u6848\u3002\u5927\u8bed\u8a00\u6a21\u578b\u901a\u5e38\u751f\u6210\u4e00\u4e2a\u63a8\u7406\u94fe y<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;832b58b0-4768-47d1-8fbf-65be957391bb&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>&nbsp;\u6765\u8f85\u52a9\u63a8\u5bfc\u6700\u7ec8\u7b54\u6848\uff0c\u56e0\u6b64\u53ef\u5c06&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;c0d31d9d-cfab-47f4-960c-c9f6e3d05df9&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;y&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>y<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u89c6\u4e3a\u6f5c\u53d8\u91cf\u3002\u8bbe\u5b9a&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;9e23d39e-2414-46ed-865f-eafd716e01e8&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;O=1&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>O=1<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u8868\u793a\u8f93\u51fa\u6b63\u786e\uff08\u5373&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;e3e60588-9818-4557-8db0-33b37a8e94bb&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;z=\\hat{z}n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.3435114503816794\" data-type=\"png\" data-w=\"262\" style=\"vertical-align: baseline;width: 56px;height: auto !important\" width=\"60\" data-width=\"60px\" data-imgfileid=\"100227666\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-8eb31dd77e46be88ad6769bbca3e5c81.png\" \/><\/span><\/sub><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff09\uff0c\u6700\u7ec8\u4f18\u5316\u76ee\u6807\u4e3a\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.3490740740740741\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"height: auto !important\" data-imgfileid=\"100227669\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-6c049ce866aabcd9f9a8df2f9ffeec0a.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;7e8e1180-2c9f-4823-b356-ba98b50bf3f9&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\begin{aligned}\\max_\\phi\\log p_\\phi(O=1|x)&amp;=\\max_\\phi\\log\\mathbb{E}_{q(y|x)}\\Big[\\frac{p_\\phi(O=1,y|x)}{q(y|x)}\\Big]\\\\&amp;\\geq\\max_\\phi\\mathbb{E}_{q(y|x)}\\log\\Big[\\frac{p_\\phi(O=1,y|x)}{q(y|x)}\\Big]\\\\&amp;=\\max_\\phi\\mathbb{E}_{q(y|x)}\\log\\Big[\\frac{p_\\phi(O=1,y|x)}{q(y|x)}\\Big]\\\\&amp;=\\max_\\phi\\{-\\mathbb{D}_{KL}[q(y|x)||p_\\phi(O=1,y|x)]\\}\\\\&amp;=\\max_\\phi\\mathbb{E}_{q(y|x)}[p_\\phi(O=1|x,y)]-\\mathbb{D}_{KL}[q(y|x)||p_\\phi(y|x)].\\end{aligned} \\ \\ \\ \\ (23)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5bf9\u4e8e\u542b\u6709\u6f5c\u53d8\u91cf\u7684\u4f18\u5316\u95ee\u9898\uff0cEM\u7b97\u6cd5\u662f\u5e38\u7528\u65b9\u6cd5\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728 E \u6b65\u9aa4\u4e2d\uff0c\u56fa\u5b9a&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;e04f185b-f0c5-4cf6-a5b1-f074a1bede6b&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;p(O=1,y|x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>p(O=1,y|x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u901a\u8fc7\u6700\u5c0f\u5316&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;2534befb-2f15-4375-a71f-7575fd785df7&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\mathbb{D}_KL(q(y|x)||p(O=1,y|x))n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><sub data-pm-slice=\"0 0 []\"><span><img alt=\"image.png\" class=\"rich_pages wxw-img\" data-ratio=\"0.09814814814814815\" data-type=\"png\" data-w=\"1080\" style=\"vertical-align: baseline;width: 177px;height: auto !important\" width=\"177\" data-width=\"177px\" data-imgfileid=\"100227737\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-a6634be67b1c8af4610e16a0bbd93f5c.png\" \/><\/span><\/sub><span><span>&nbsp;\u6765\u6700\u5927\u5316\u76ee\u6807\uff0c\u6700\u7ec8\u5f97\u5230\uff1a<\/span><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.06851851851851852\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 326px;height: auto !important\" data-imgfileid=\"100227671\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-d8f58730b9368d14c6194b7a808180c3.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u8fd9\u4e00\u7ed3\u679c\u53ef\u89e3\u91ca\u4e3a\uff1a\u5148\u751f\u6210\u63a8\u7406\u8fc7\u7a0b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;50485bfc-c13c-4835-949e-bcff39651fe6&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;ynn&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>y&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u518d\u901a\u8fc7&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;97d72913-6871-414e-87f0-691ecaa99d60&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;p(O=1|x,y)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>p(O=1|x,y)&nbsp;<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\u5224\u65ad\u8be5\u63a8\u7406\u8fc7\u7a0b\u662f\u5426\u80fd\u5bfc\u51fa\u6b63\u786e\u7b54\u6848\u3002\u56e0\u6b64\uff0cE \u6b65\u9aa4\u5bf9\u5e94\u4e8e\u5efa\u6a21\u201c\u6570\u636e\u751f\u6210\u4e0e\u8bc4\u4ef7\u201d\u7684\u8fc7\u7a0b\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728 M \u6b65\u9aa4\u4e2d\uff0c\u56fa\u5b9a &nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;3724c3af-29c6-4cde-9747-40c02f257f04&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;q( y| x)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>q(y|x)<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>\uff0c\u76ee\u6807\u53d8\u4e3a\u6700\u5c0f\u5316\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.07222222222222222\" data-s=\"300,640\" data-type=\"png\" data-w=\"1080\" style=\"width: 387px;height: auto !important\" data-imgfileid=\"100227670\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-156af9ba3b1654743b5e1a8bde480cfb.png\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;1f01b8f9-8ec0-4796-a38b-36988e7080b4&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;\\min_\\phi\\mathbb{D}_{KL}(q(y|x)||p_\\phi(y|x))=\\max_\\phi q(y|x)\\log p_\\phi(y|x). \\ \\ \\ \\ (24)n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><br \/><\/span><\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>M\u6b65\u9aa4\u65e8\u5728\u4f7f\u7528\u751f\u6210\u7684\u6570\u636e\u8bad\u7ec3\u63a8\u7406\u6a21\u578b&nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;d41d60ea-a613-4e0f-8c5f-87d4062a4b34&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;p_\\phi(y|x)&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span data-mpa-action-id=\"m9cqipkz8yd\" data-pm-slice=\"0 0 []\"><span><span>p<\/span><\/span><sub><span>\u03a6<\/span><\/sub><span><span>(y|x)<\/span><\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\uff0c\u5373\u5b66\u4e60\u6a21\u578b\u8fdb\u5316\u7684\u8fc7\u7a0b\u3002\u7531\u4e8e EM \u5177\u6709\u7406\u8bba\u6536\u655b\u6027\uff0c\u6211\u4eec\u6709\u7406\u7531\u76f8\u4fe1\u8fd9\u79cd\u6570\u636e\u4e0e\u6a21\u578b\u4ea4\u66ff\u6539\u8fdb\u7684\u8fed\u4ee3\u8fc7\u7a0b\u662f\u53ef\u6536\u655b\u7684\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4f46\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u4e0a\u8ff0\u5206\u6790\u4f9d\u8d56\u4e8e\u201c\u56fa\u5b9a\u4efb\u52a1\u96c6\u201d\u7684\u5047\u8bbe\u3002\u5f53\u4efb\u52a1\u672c\u8eab\u4e5f\u5728\u6f14\u5316\u65f6\uff0c\u662f\u5426\u4ecd\u5177\u6536\u655b\u6027\u4ecd\u662f\u4e00\u4e2a\u60ac\u800c\u672a\u51b3\u7684\u95ee\u9898\u3002\u4ece\u76f4\u89c9\u6765\u770b\uff0c\u4efb\u52a1\u96c6\u7684\u6269\u5c55\u6709\u52a9\u4e8e\u8986\u76d6\u66f4\u591a\u5206\u5e03\u533a\u57df\uff0c\u4ece\u800c\u63d0\u9ad8\u6a21\u578b\u6cdb\u5316\u80fd\u529b\u3002\u7136\u800c\uff0c\u81ea\u6211\u8fdb\u5316\u662f\u5426\u5b58\u5728\u89c4\u6a21\u6cd5\u5219\uff0c\u8fd8\u53d6\u51b3\u4e8e\u4efb\u52a1\u6f14\u5316\u672c\u8eab\u662f\u5426\u4e5f\u670d\u4ece\u67d0\u79cd\u89c4\u5f8b\u2014\u2014\u5373\u5b83\u662f\u5426\u5b58\u5728\u5148\u9a8c\u7684\u590d\u6742\u5ea6\u4e0a\u9650\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5f53\u7136\uff0c\u4ee5\u4e0a\u7ed3\u8bba\u5c1a\u5c5e\u7406\u8bba\u63a8\u5bfc\uff0c\u4ecd\u9700\u8fdb\u4e00\u6b65\u5b9e\u8bc1\u7814\u7a76\u52a0\u4ee5\u9a8c\u8bc1\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>[\u7814\u7a76\u95ee\u98982] \u81ea\u6211\u8fdb\u5316\u5b9e\u73b0\u6301\u7eed\u6539\u8fdb\u7684\u5173\u952e\u56e0\u7d20\u662f\u4ec0\u4e48\uff1f<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Zeng \u7b49<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u53d1\u73b0\uff0c\u5728\u73b0\u6709\u7684\u81ea\u6211\u8fdb\u5316\u65b9\u6cd5\u4e2d\uff0c\u7ecf\u8fc7 3 \u5230 4 \u4e2a\u8bad\u7ec3\u5468\u671f\u540e\uff0c\u6027\u80fd\u589e\u76ca\u660e\u663e\u51cf\u5c0f\uff0c\u751a\u81f3\u51fa\u73b0\u6027\u80fd\u4e0b\u964d\u7684\u73b0\u8c61\u3002\u4e3a\u7814\u7a76\u5f71\u54cd\u6a21\u578b\u6027\u80fd\u7684\u74f6\u9888\uff0cZeng \u7b49<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u5206\u6790\u4e86\u6a21\u578b\u6240\u641c\u7d22\u5230\u7684\u63a8\u7406\u8f68\u8ff9\u7684\u591a\u6837\u6027\u3002\u7ed3\u679c\u8868\u660e\uff0c\u968f\u7740\u81ea\u6211\u8fdb\u5316\u8bad\u7ec3\u7684\u8fdb\u884c\uff0c\u63a8\u7406\u8f68\u8ff9\u7684\u591a\u6837\u6027\u663e\u8457\u51cf\u5c11\u3002\u8fd9\u662f\u56e0\u4e3a\u88ab\u9ad8\u5ea6\u8bc4\u4f30\u7684\u8f68\u8ff9\u66f4\u6709\u53ef\u80fd\u88ab\u91cd\u65b0\u91c7\u6837\uff0c\u4ece\u800c\u5bfc\u81f4\u63a8\u7406\u6a21\u578b\u6536\u655b\u5230\u8f83\u4e3a\u72ed\u7a84\u7684\u63a8\u7406\u6a21\u5f0f\u3002\u867d\u7136\u8fd9\u79cd\u63a2\u7d22\u51cf\u5c11\u6709\u52a9\u4e8e\u6a21\u578b\u805a\u7126\u4e8e\u751f\u6210\u66f4\u9ad8\u8d28\u91cf\u7684\u63a8\u7406\u8fc7\u7a0b\uff0c\u4f46\u4e5f\u610f\u5473\u7740\u6a21\u578b\u672a\u80fd\u63a2\u7d22\u65b0\u7684\u77e5\u8bc6\uff0c\u8fdb\u800c\u963b\u788d\u4e86\u5176\u6cdb\u5316\u80fd\u529b\u7684\u63d0\u5347\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u57fa\u4e8e\u8fd9\u4e9b\u89c2\u5bdf\uff0cZeng \u7b49<\/span><span>&nbsp;[2024a]&nbsp;<\/span><span>\u63d0\u51fa\u4e86\u4e00\u4e2a\u65b0\u7684\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u2014\u2014B-STAR\u3002\u9996\u5148\uff0c\u4ed6\u4eec\u8bbe\u8ba1\u4e86\u201c\u5e73\u8861\u5206\u6570\u201d<\/span><span>\uff08Balance Score\uff09<\/span><span>\u6307\u6807\uff0c\u7528\u4e8e\u8861\u91cf\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u6a21\u578b\u7684\u63a2\u7d22\u80fd\u529b\u3002\u968f\u540e\uff0c\u4ed6\u4eec\u5f15\u5165\u4e86\u4e00\u79cd\u57fa\u4e8e\u91c7\u6837\u6e29\u5ea6\u548c\u5956\u52b1\u8fc7\u6ee4\u9608\u503c\u7684\u52a8\u6001\u8bad\u7ec3\u7b56\u7565\uff0c\u65e8\u5728\u7f13\u89e3\u81ea\u6211\u8fdb\u5316\u8fc7\u7a0b\u4e2d\u63a2\u7d22\u6027\u4e0b\u964d\u7684\u95ee\u9898\u3002B-STAR \u6846\u67b6\u7684\u7ed3\u8bba\u8868\u660e\uff0c\u5f71\u54cd\u81ea\u6211\u8fdb\u5316\u6027\u80fd\u7684\u5173\u952e\u56e0\u7d20\u662f\u5927\u8bed\u8a00\u6a21\u578b\u5728\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\u9047\u5230\u7684\u63a8\u7406\u8f68\u8ff9\u7684\u591a\u6837\u6027\u3002\u5f53\u8fd9\u79cd\u591a\u6837\u6027\u51cf\u5c11\u65f6\uff0c\u8fdb\u5316\u6548\u679c\u4e5f\u4f1a\u968f\u4e4b\u51cf\u5f31\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4ece\u66f4\u9ad8\u5c42\u6b21\u7684\u89d2\u5ea6\u6765\u770b\uff0cB-STAR \u901a\u8fc7\u52a0\u5f3a\u601d\u7ef4\u94fe<\/span><span>\uff08CoT\uff09<\/span><span>\u8fdb\u5316\u7684\u63a2\u7d22\u529b\u5ea6\uff0c\u63d0\u5347\u4e86\u63a8\u7406\u7cfb\u7edf\u7684\u6cdb\u5316\u80fd\u529b\u3002\u9075\u5faa\u8fd9\u4e00\u63a8\u7406\u6cdb\u5316\u7684\u6539\u8fdb\u539f\u5219\uff0c\u53ef\u4ee5\u91c7\u7528\u4ee5\u4e0b\u989d\u5916\u7b56\u7565\u8fdb\u4e00\u6b65\u589e\u5f3a\u7cfb\u7edf\u7684\u6cdb\u5316\u80fd\u529b\uff1a1\uff09 \u589e\u52a0\u4efb\u52a1\u7684\u591a\u6837\u6027\u548c\u96be\u5ea6\u53ef\u4ee5\u76f4\u63a5\u589e\u5f3a\u63a8\u7406\u8f68\u8ff9\u7684\u591a\u6837\u6027\uff0c\u8fdb\u800c\u6539\u5584\u7cfb\u7edf\u7684\u6cdb\u5316\u80fd\u529b<\/span><span>&nbsp;[Li \u7b49\uff0c2024a]<\/span><span>\uff1b2\uff09 \u63d0\u5347\u7cfb\u7edf\u7684\u81ea\u6211\u8bc4\u4f30\u548c\u540e\u5904\u7406\u80fd\u529b\uff0c\u6709\u52a9\u4e8e\u5728\u9047\u5230\u9519\u8bef\u65f6\u663e\u8457\u589e\u5f3a\u7cfb\u7edf\u7684\u9c81\u68d2\u6027\uff1b3\uff09 \u63d0\u9ad8\u63a8\u7406\u5668\u903b\u8f91\u7684\u4e00\u81f4\u6027\u548c\u6cdb\u5316\u80fd\u529b\uff0c\u53ef\u4ee5\u6709\u6548\u964d\u4f4e\u9519\u8bef\u7684\u53d1\u751f\u9891\u7387\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">5.2 \u81ea\u6211\u8fdb\u5316\u7b56\u7565<\/span><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u9884\u5907\u90e8\u5206\u4e2d\uff0c\u6211\u4eec\u5df2\u754c\u5b9a\u4e86\u63a8\u7406\u7cfb\u7edf\u4e2d\u7684\u56db\u4e2a\u5173\u952e\u6a21\u5757\u53ca\u5176\u529f\u80fd\u4e0e\u76f8\u4e92\u5173\u7cfb\u3002\u7531\u6b64\uff0c\u63a8\u7406\u7cfb\u7edf\u53ef\u89c6\u4e3a\u4e00\u4e2a\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\uff0c\u7406\u8bba\u4e0a\u4efb\u4f55\u4e00\u4e2a\u6a21\u5757\u7684\u8fdb\u5316\u90fd\u5e94\u63d0\u5347\u6574\u4f53\u6027\u80fd\uff0c\u8054\u5408\u4f18\u5316\u5219\u6709\u671b\u5e26\u6765\u66f4\u663e\u8457\u7684\u6027\u80fd\u589e\u76ca\u3002\u4e0b\u6587\u603b\u7ed3\u4e09\u7c7b\u9002\u7528\u4e8e\u63a8\u7406\u7cfb\u7edf\u7684\u591a\u667a\u80fd\u4f53\u8bad\u7ec3\u7b56\u7565\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.2.1 \u72ec\u7acb\u8fdb\u5316<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u65e9\u671f\u81ea\u6211\u8fdb\u5316\u7cfb\u7edf\u4e2d\uff0c\u6a21\u5757\u4f18\u5316\u901a\u5e38\u76f8\u5bf9\u72ec\u7acb\uff0c\u4ec5\u805a\u7126\u5355\u4e2a\u6a21\u5757\uff0c\u4e14\u6a21\u5757\u95f4\u8026\u5408\u6027\u8f83\u5f31\u3002\u4f8b\u5982\uff0cZelikman \u7b49<\/span><span>[2022]<\/span><span>\u3001Gulcehre \u7b49<\/span><span>[2023]<\/span><span>\u4f7f\u7528\u6807\u51c6\u7b54\u6848\u7b5b\u9009\u6b63\u786e\u89e3\uff0c\u4ee5\u63d0\u5347\u63a8\u7406\u5668\u6027\u80fd\uff1bHosseini \u7b49<\/span><span>[2024]<\/span><span>\u4ea6\u57fa\u4e8e\u6807\u51c6\u7b54\u6848\u6784\u9020\u504f\u597d\u5bf9\uff0c\u7528 DPO \u8bad\u7ec3\u9a8c\u8bc1\u5668\uff0c\u4f46\u9a8c\u8bc1\u5668\u672a\u80fd\u53cd\u54fa\u63a8\u7406\u5668\u8bad\u7ec3\u3002Madaan \u7b49<\/span><span>[2023b]<\/span><span>\u901a\u8fc7\u89e3\u7684\u8fed\u4ee3\u4fee\u6b63\u5b9e\u73b0\u903b\u8f91\u81ea\u6211\u8fdb\u5316\uff0c\u7136\u800c\u7ed3\u679c\u8868\u660e\u4ec5\u4f9d\u8d56\u4e0a\u4e0b\u6587\u5b66\u4e60\u96be\u4ee5\u5b9e\u73b0\u81ea\u6211\u7ea0\u9519\u3002Wang \u7b49<\/span><span>[2023d]<\/span><span>\u5728\u63a8\u7406\u9636\u6bb5\u4f18\u5316\u540e\u5904\u7406\u5668\uff0c\u63a8\u7406\u5668\u4ec5\u751f\u6210\u521d\u59cb\u89e3\uff0c\u5bf9\u540e\u7eed\u6f14\u5316\u65e0\u8d21\u732e\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u72ec\u7acb\u8fdb\u5316\u5b9e\u73b0\u7b80\u5355\uff0c\u6613\u4e8e\u90e8\u7f72\uff0c\u4f46\u63d0\u5347\u6709\u9650\u3002\u5bf9\u5404\u6a21\u5757\u6f14\u5316\u673a\u5236\u7684\u5355\u72ec\u7814\u7a76\u53ef\u4e3a\u672a\u6765\u591a\u6a21\u5757\u8054\u5408\u4f18\u5316\u5960\u5b9a\u57fa\u7840\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.2.2 \u534f\u4f5c\u8fdb\u5316<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6d89\u53ca\u591a\u4e2a\u6a21\u5757\u7684\u8054\u5408\u8fdb\u5316\u65f6\uff0c\u4e00\u79cd\u5e38\u89c1\u65b9\u6cd5\u662f\u5229\u7528\u6a21\u5757\u4e4b\u95f4\u7684\u5408\u4f5c\u6765\u63d0\u9ad8\u6574\u4f53\u7cfb\u7edf\u6027\u80fd\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u534f\u4f5c\u8fdb\u5316\u901a\u8fc7\u6a21\u5757\u95f4\u4fe1\u606f\u5171\u4eab\u63d0\u5347\u6574\u4f53\u6027\u80fd\u3002Jiang \u7b49<\/span><span>[2024a]<\/span><span>\u7528\u63a8\u7406\u5668\u751f\u6210\u6b63\u786e\u4e0e\u9519\u8bef\u89e3\u6784\u5efa\u5956\u52b1\u6a21\u578b\u8bad\u7ec3\u6570\u636e\uff0c\u540c\u65f6\u7528\u5956\u52b1\u6a21\u578b\u7b5b\u9009\u4f18\u8d28\u89e3\u53cd\u54fa\u63a8\u7406\u5668\u8bad\u7ec3\u3002Wang \u7b49<\/span><span>[2024e]<\/span><span>\u91c7\u7528\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u6574\u4e2a\u63a8\u7406\u7cfb\u7edf\uff0c\u7b56\u7565\u8fed\u4ee3\u4e2d\u4f7f\u7528\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08\u8bc4\u4f30\u5668\uff09<\/span><span>\u63d0\u4f9b\u4f18\u5316\u4fe1\u53f7\uff0c\u4ef7\u503c\u8fed\u4ee3\u4e2d\u5229\u7528\u63a8\u7406\u5668\u751f\u6210\u7684\u6570\u636e\u8bad\u7ec3\u8bc4\u4f30\u5668\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4e0a\u8ff0\u5de5\u4f5c\u8868\u660e\uff0c\u672a\u6765\u5e94\u8fdb\u4e00\u6b65\u63a2\u7d22\u66f4\u590d\u6742\u7684\u534f\u540c\u7b56\u7565\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.2.3 \u5bf9\u6297\u8fdb\u5316<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9664\u534f\u4f5c\u5916\uff0c\u5bf9\u6297\u673a\u5236\u4e5f\u662f\u4e00\u79cd\u6709\u6548\u7684\u8054\u5408\u5b66\u4e60\u7b56\u7565\u3002\u5982 GAN<\/span><span>&nbsp;[Goodfellow \u7b49, 2014]&nbsp;<\/span><span>\u4e2d\u751f\u6210\u5668\u4e0e\u5224\u522b\u5668\u7684\u535a\u5f08\u6781\u5927\u63a8\u52a8\u4e86\u751f\u6210\u6a21\u578b\u7684\u53d1\u5c55\u3002\u5728\u63a8\u7406\u7cfb\u7edf\u4e2d\uff0c\u4efb\u52a1\u751f\u6210\u5668\u4e0e\u63a8\u7406\u5668\u5929\u7136\u6784\u6210\u5bf9\u6297\u5173\u7cfb\uff1a\u524d\u8005\u63d0\u51fa\u66f4\u5177\u6311\u6218\u6027\u7684\u4efb\u52a1\uff0c\u540e\u8005\u5c1d\u8bd5\u89e3\u51b3\u4e4b\u3002Ye \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa\u63a8\u7406\u5668\u4e0e\u4efb\u52a1\u751f\u6210\u5668\u7684\u5bf9\u6297\u8bad\u7ec3\u6846\u67b6\u3002\u63a8\u7406\u5668\u901a\u8fc7 ReST&nbsp;<\/span><span>[Gulcehre \u7b49, 2023]<\/span><span>\u8fed\u4ee3\u4f18\u5316\uff0c\u4efb\u52a1\u751f\u6210\u5668\u57fa\u4e8e\u4e0d\u786e\u5b9a\u6027\u9009\u62e9\u79cd\u5b50\u4efb\u52a1\u5e76\u901a\u8fc7 Eval Instruct \u751f\u6210\u591a\u6837\u4efb\u52a1\u3002\u6b64\u8fc7\u7a0b\u5b9e\u73b0\u4e86\u4efb\u52a1\u751f\u6210\u5668\u4e0e\u63a8\u7406\u5668\u7684\u8054\u52a8\u8fdb\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5bf9\u6297\u673a\u5236\u8f83\u534f\u4f5c\u66f4\u96be\u5b66\u4e60\uff0c\u4f46\u80fd\u7f13\u89e3\u6a21\u578b\u9677\u5165\u5c40\u90e8\u6700\u4f18\u7684\u95ee\u9898\uff0c\u800c\u534f\u4f5c\u673a\u5236\u5219\u53ef\u80fd\u52a0\u5267\u6b64\u98ce\u9669\u3002\u82e5\u5408\u7406\u878d\u5408\u534f\u4f5c\u4e0e\u5bf9\u6297\u7b56\u7565\uff0c\u7cfb\u7edf\u6709\u671b\u83b7\u5f97\u66f4\u5927\u6027\u80fd\u63d0\u5347\u3002<\/span><\/span><\/p>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h2>\n<h2 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">5.3 \u81ea\u6211\u8fdb\u5316\u6a21\u5f0f<\/span><\/span><\/h2>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u8ba8\u8bba\u6574\u4f53\u7b56\u7565\u4e4b\u540e\uff0c\u6211\u4eec\u4ece\u6a21\u5757\u89c6\u89d2\u51fa\u53d1\uff0c\u8fdb\u4e00\u6b65\u5206\u6790\u591a\u79cd\u81ea\u6211\u8fdb\u5316\u6a21\u5f0f\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.3.1 \u63a8\u7406\u5668<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u591a\u6570\u65b9\u6cd5\u76f4\u63a5\u4f18\u5316\u63a8\u7406\u5668\u4ee5\u63d0\u5347\u7cfb\u7edf\u6027\u80fd\uff0c\u8fd9\u7c7b\u65b9\u6cd5\u5b9e\u73b0\u7b80\u5355\uff0c\u5dee\u5f02\u4e3b\u8981\u4f53\u73b0\u5728\u8bad\u7ec3\u6570\u636e\u6784\u9020\u4e0e\u8bad\u7ec3\u65b9\u5f0f\u4e0a\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u63a8\u7406\u5668\u8bad\u7ec3\u65b9\u9762\uff0cGulcehre \u7b49<\/span><span>[2023]<\/span><span>\u3001Min \u7b49<\/span><span>[2024]<\/span><span>\u3001Zelikman \u7b49<\/span><span>[2022]<\/span><span>\u5728\u6b63\u786e\u63a8\u7406\u8f68\u8ff9\u4e0a\u8fdb\u884c\u76d1\u7763\u5fae\u8c03<\/span><span>\uff08SFT\uff09<\/span><span>\uff1bChen \u7b49<\/span><span>[2024b]<\/span><span>\u3001Xie \u7b49<\/span><span>[2024]<\/span><span>\u3001Wang \u7b49<\/span><span>[2024j,k]<\/span><span>\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u7b5b\u9009\u504f\u597d\u6570\u636e\u5e76\u636e\u6b64\u8bad\u7ec3\u63a8\u7406\u5668\uff1bGulcehre \u7b49<\/span><span>[2023]<\/span><span>\u5219\u901a\u8fc7\u5956\u52b1\u6a21\u578b\u751f\u6210\u7684\u5956\u52b1\uff0c\u5229\u7528\u5f3a\u5316\u5b66\u4e60\u4f18\u5316\u63a8\u7406\u5668\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u6570\u636e\u83b7\u53d6\u65b9\u9762\uff0cSingh \u7b49<\/span><span>[2023]<\/span><span>\u3001Min \u7b49<\/span><span>[2024]<\/span><span>\u3001Pang \u7b49<\/span><span>[2024]<\/span><span>\u76f4\u63a5\u4f7f\u7528\u6807\u51c6\u7b54\u6848\u7b5b\u9009\u6b63\u786e\u6837\u672c\u7528\u4e8e\u4e0b\u4e00\u8f6e\u8bad\u7ec3\uff1bZelikman \u7b49<\/span><span>[2022]<\/span><span>\u901a\u8fc7\u5c06\u6b63\u786e\u7b54\u6848\u4f5c\u4e3a\u63d0\u793a\uff0c\u91cd\u65b0\u751f\u6210\u539f\u672c\u9519\u8bef\u95ee\u9898\u7684\u89e3\u7b54\uff0c\u4ee5\u63d0\u5347\u6b63\u6837\u672c\u6bd4\u4f8b\uff1bPeng \u7b49[2024]\u6307\u51fa\u5728 STaR<\/span><span>&nbsp;[Zelikman \u7b49\uff0c2022]&nbsp;<\/span><span>\u4e2d\u76f4\u63a5\u63d0\u4f9b\u7b54\u6848\u53ef\u80fd\u5bfc\u81f4\u6a21\u578b\u5f62\u6210\u6377\u5f84\uff0c\u56e0\u6b64\u4ec5\u5728\u62bd\u8c61\u63a8\u7406\u9636\u6bb5\u63d0\u4f9b\u7b54\u6848\uff1bHuang \u7b49<\/span><span>[2022]<\/span><span>\u3001Li \u7b49<\/span><span>[2024c]<\/span><span>\u5219\u5728\u65e0\u7b54\u6848\u6807\u7b7e\u573a\u666f\u4e2d\u5229\u7528\u4e00\u81f4\u6027\u7b5b\u9009\u7b54\u6848\u4ee5\u6784\u5efa\u6570\u636e\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0cAksitov \u7b49<\/span><span>[2023]<\/span><span>\u3001Dong \u7b49<\/span><span>[2023]<\/span><span>\u4f7f\u7528\u5956\u52b1\u6a21\u578b\u5bf9\u63a8\u7406\u8f68\u8ff9\u8fdb\u884c\u8bc4\u5206\u548c\u6392\u5e8f\uff0c\u7b5b\u9009\u9ad8\u8d28\u91cf\u8f68\u8ff9\u8bad\u7ec3\u63a8\u7406\u5668\uff1bSong \u7b49<\/span><span>[2024]<\/span><span>\u4f9d\u636e\u73af\u5883\u5956\u52b1\u8fc7\u6ee4\u4f4e\u8d28\u91cf\u8f68\u8ff9\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.3.2 \u63a8\u7406\u5668+\u8bc4\u4f30\u5668<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u5728\u81ea\u6211\u8fdb\u5316\u4e2d\uff0c\u8bc4\u4f30\u5668\u9700\u8bc4\u4f30\u63a8\u7406\u8fc7\u7a0b\uff0c\u4f46\u5176\u6cdb\u5316\u80fd\u529b\u662f\u5173\u952e\u96be\u70b9\u3002\u968f\u7740\u8bad\u7ec3\u6df1\u5165\uff0c\u63a8\u7406\u5668\u751f\u6210\u7684\u95ee\u9898\u4e0e\u63a8\u7406\u8fc7\u7a0b\u53ef\u80fd\u504f\u79bb\u8bc4\u4f30\u5668\u7684\u8bad\u7ec3\u5206\u5e03\uff0c\u5f71\u54cd\u8bc4\u4f30\u6548\u679c\uff0c\u56e0\u6b64\u63d0\u5347\u8bc4\u4f30\u5668\u7684\u6cdb\u5316\u80fd\u529b\u81f3\u5173\u91cd\u8981\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Yuan \u7b49<\/span><span>[2024d]<\/span><span>\u3001Wang \u7b49<\/span><span>[2024c]&nbsp;<\/span><span>\u4f7f\u7528\u63a8\u7406\u5668\u751f\u6210\u7684\u6b63\u786e\u63a8\u7406\u8fc7\u7a0b\u8bad\u7ec3\u63a8\u7406\u5668\uff0c\u540c\u65f6\u5229\u7528\u6b63\u8d1f\u6837\u672c\u8bad\u7ec3\u5956\u52b1\u6a21\u578b\uff0c\u4ece\u800c\u63d0\u5347\u8bc4\u4f30\u5668\u6027\u80fd\u3002Jiang \u7b49<\/span><span>[2024a]<\/span><span>\u8fdb\u4e00\u6b65\u5f15\u5165\u5956\u52b1\u6a21\u578b\u53c2\u4e0e\u6837\u672c\u7b5b\u9009\uff0c\u5e76\u91c7\u7528\u4e3b\u52a8\u5b66\u4e60\u4f18\u5148\u5904\u7406\u96be\u4f8b\u3002Zhang \u7b49<\/span><span>[2024c]<\/span><span>\u3001Guan \u7b49<\/span><span>[2025]<\/span><span>\u63d0\u51fa\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u7684\u8fed\u4ee3\u8bad\u7ec3\u6846\u67b6\uff1a\u63a8\u7406\u5668\u901a\u8fc7MCTS\u83b7\u5f97\u6b65\u9aa4\u7ea7\u4ef7\u503c\u4f30\u8ba1\uff0c\u8bad\u7ec3\u51fa\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PPM\uff09<\/span><span>\uff0c\u540e\u8005\u518d\u7528\u4e8e\u63d0\u5347\u63a8\u7406\u8f68\u8ff9\u8d28\u91cf\uff0c\u53cd\u54fa\u63a8\u7406\u5668\u3002Zhang \u7b49<\/span><span>[2024j]<\/span><span>\u3001Wang \u7b49<\/span><span>[2024e]<\/span><span>\u91c7\u7528\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\u63a8\u7406\u5668\uff0c\u5e76\u7528\u8fc7\u7a0b\u5956\u52b1\u8bc4\u5206\u6307\u5bfc\u5176\u4f18\u5316\uff0c\u5b9e\u73b0\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u7684\u534f\u540c\u8fdb\u5316\u3002Chen \u7b49<\/span><span>[2024b]<\/span><span>\u3001Zhang \u7b49<\/span><span>[2024c]<\/span><span>\u4e0d\u76f4\u63a5\u4f7f\u7528\u8bc4\u4f30\u5668\uff0c\u800c\u662f\u901a\u8fc7\u5956\u52b1\u6a21\u578b\u5f15\u5bfcMCTS\u641c\u7d22\u4ee5\u63d0\u9ad8\u63a8\u7406\u5668\u751f\u6210\u89e3\u7684\u6b63\u786e\u7387\u3002Cheng \u7b49<\/span><span>[2024]<\/span><span>\u3001Chen \u7b49<\/span><span>[2024i]<\/span><span>\u63a2\u7d22\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u7684\u5bf9\u6297\u8bad\u7ec3\uff0c\u8bc4\u4f30\u5668\u5224\u65ad\u56de\u7b54\u662f\u5426\u4e0e\u6807\u51c6\u4e00\u81f4\uff0c\u63a8\u7406\u5668\u5219\u8bd5\u56fe\u751f\u6210\u6df7\u6dc6\u8bc4\u4f30\u5668\u7684\u56de\u7b54\u3002\u7136\u800c\u8be5\u65b9\u6cd5\u5728\u63a8\u7406\u4efb\u52a1\u4e2d\u4ecd\u5f85\u9a8c\u8bc1\uff0c\u56e0\u4e3a\u63a8\u7406\u4efb\u52a1\u91cd\u5728\u7b54\u6848\u6b63\u786e\u6027\u800c\u975e\u8fc7\u7a0b\u4e00\u81f4\u6027\uff0c\u63a8\u7406\u591a\u6837\u6027\u66f4\u80fd\u4f53\u73b0\u6a21\u578b\u80fd\u529b\uff0c\u4ec5\u5bf9\u9f50\u6807\u51c6\u8fc7\u7a0b\u53ef\u80fd\u9002\u5f97\u5176\u53cd\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.3.3 \u63a8\u7406\u5668+\u540e\u5904\u7406\u5668<\/span><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u63a8\u7406\u5668\u8d8a\u5f3a\u5927\uff0c\u540e\u5904\u7406\u5668<\/span><span>\uff08\u5c24\u5176\u662f\u7cbe\u70bc\u5668\uff09<\/span><span>\u6240\u9700\u7684\u4fee\u6b63\u538b\u529b\u8d8a\u5c0f\u3002\u4e00\u4e9b\u5de5\u4f5c\u81f4\u529b\u4e8e\u8fd9\u4e24\u4e2a\u6a21\u5757\u7684\u534f\u540c\u4f18\u5316\u3002Dou \u7b49<\/span><span>[2024]<\/span><span>\u9996\u5148\u7528\u63a8\u7406\u5668\u751f\u6210\u521d\u59cb\u89e3\uff0c\u518d\u8fdb\u884c\u7cbe\u70bc\uff1bWang \u7b49<\/span><span>[2023d]<\/span><span>\u91c7\u7528\u591a\u8f6e\u7cbe\u70bc\uff0c\u76f4\u81f3\u89e3\u6b63\u786e\u6216\u8fbe\u5230\u6700\u5927\u8f6e\u6570\u3002\u8bad\u7ec3\u8fc7\u7a0b\u4e2d\uff0cDou \u7b49<\/span><span>[2024]<\/span><span>\u91c7\u96c6\u7cbe\u70bc\u540e\u7684\u89e3\u5e76\u901a\u8fc7SFT\u8bad\u7ec3\u7cbe\u70bc\u5668\uff0c\u540c\u65f6\u7528\u6b63\u786e\u89e3\u5bf9\u63a8\u7406\u5668\u8fdb\u884cSFT\u4ee5\u589e\u5f3a\u5176\u63a8\u7406\u80fd\u529b\uff1bWang \u7b49<\/span><span>[2023d]<\/span><span>\u5219\u5c06\u7cbe\u70bc\u8fc7\u7a0b\u5efa\u6a21\u4e3a\u9a6c\u5c14\u53ef\u592b\u51b3\u7b56\u8fc7\u7a0b\uff0c\u4f7f\u7528\u5f3a\u5316\u5b66\u4e60\u4f18\u5316\u7cbe\u70bc\u5668\u5404\u8f6e\u4fee\u6539\u64cd\u4f5c\uff0c\u5e76\u4ee5\u6700\u7ec8\u89e3\u4e3a\u76ee\u6807\u8bad\u7ec3\u63a8\u7406\u5668\uff0c\u4e8c\u8005\u540c\u6b65\u4f18\u5316\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.3.4 \u63a8\u7406\u5668+\u4efb\u52a1\u521b\u5efa\u5668<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u4efb\u52a1\u7684\u591a\u6837\u6027\u4e0e\u590d\u6742\u6027\u5bf9\u81ea\u6211\u8fdb\u5316\u6548\u679c\u5f71\u54cd\u663e\u8457\u3002\u82e5\u4ec5\u5728\u9759\u6001\u4efb\u52a1\u96c6\u4e0a\u5b66\u4e60\uff0c\u63a8\u7406\u5668\u5bb9\u6613\u8fc7\u62df\u5408\uff0c\u964d\u4f4e\u5bf9\u5206\u5e03\u5916\u4efb\u52a1\u7684\u9002\u5e94\u80fd\u529b\uff0c\u56e0\u6b64\u4efb\u52a1\u521b\u5efa\u5668\u7684\u8fdb\u5316\u4ea6\u81f3\u5173\u91cd\u8981\u3002Ye \u7b49<\/span><span>[2024]<\/span><span>\u63d0\u51fa\u63a8\u7406\u5668\u4e0e\u4efb\u52a1\u521b\u5efa\u5668\u7684\u5bf9\u6297\u8bad\u7ec3\u6846\u67b6\uff1a\u4efb\u52a1\u521b\u5efa\u5668\u6301\u7eed\u751f\u6210\u66f4\u5177\u6311\u6218\u6027\u7684\u4efb\u52a1\uff0c\u63a8\u7406\u5668\u5219\u63d0\u5347\u80fd\u529b\u4ee5\u5e94\u5bf9\u6311\u6218\u3002\u8be5\u65b9\u6cd5\u5982\u7b2c5.2.3\u8282\u6240\u8ff0\uff0c\u901a\u8fc7\u5bf9\u6297\u5b66\u4e60\u5b9e\u73b0\u53cc\u5411\u8fdb\u5316\uff0c\u4fdd\u969c\u6301\u7eed\u6539\u8fdb\u3002<\/span><\/span><\/p>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h3>\n<h3 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold;text-decoration: underline\">5.3.5 \u63a8\u7406\u5668+\u8bc4\u4f30\u5668+\u540e\u5904\u7406\u5668<\/span><\/span><\/h3>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u76f8\u8f83\u4e8e\u53ea\u8fdb\u5316\u5355\u4e00\u6216\u53cc\u6a21\u5757\u7684\u5de5\u4f5c\uff0c\u63a8\u7406\u5668\u3001\u8bc4\u4f30\u5668\u4e0e\u540e\u5904\u7406\u5668\u7684\u8054\u5408\u8fdb\u5316\u7406\u8bba\u4e0a\u53ef\u5b9e\u73b0\u66f4\u5927\u6027\u80fd\u63d0\u5347\u3002\u8fd1\u671f\u5de5\u4f5c\u5982 R1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]<\/span><span>\u3001T1&nbsp;<\/span><span>[Hou \u7b49\uff0c2025]<\/span><span>\u3001Kimi-k1.5<\/span><span>&nbsp;[Team \u7b49\uff0c2025]<\/span><span>\u4e13\u6ce8\u4e8e\u5b66\u4e60\u957f\u601d\u7ef4\u94fe<\/span><span>\uff08Long CoT\uff09[Xu \u7b49\uff0c2025]<\/span><span>\uff0c\u5373\u662f\u4e00\u4f8b\u3002\u8fd9\u4e9b\u65b9\u6cd5\u751f\u6210\u5305\u542b\u81ea\u8bc4\u3001\u81ea\u53cd\u601d\u4e0e\u81ea\u7ea0\u9519\u64cd\u4f5c\u7684\u957f\u601d\u7ef4\u94fe\uff0c\u5e76\u901a\u8fc7\u57fa\u4e8e\u7ed3\u679c\u5956\u52b1\u7684\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u5bf9LLM\u8fdb\u884c\u4f18\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u9996\u5148\uff0c\u57fa\u4e8e\u5728\u7ebfRL\u7684\u63a8\u7406\u5b66\u4e60\u5929\u7136\u5951\u5408\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u3002\u4e0e\u4f9d\u8d56\u9759\u6001\u6570\u636e\u96c6\u7684\u65b9\u6cd5\u4e0d\u540c\uff0c\u5728\u7ebfRL\u9a71\u52a8\u6a21\u578b\u4e0e\u73af\u5883\u4ea4\u4e92\u3001\u751f\u6210\u8f68\u8ff9\u4e0e\u5956\u52b1\u8fdb\u884c\u4f18\u5316\uff0c\u968f\u7740\u8bad\u7ec3\u63a8\u8fdb\uff0c\u6a21\u578b\u63a2\u7d22\u51fa\u66f4\u591a\u6837\u5316\u4e14\u66f4\u9ad8\u8d28\u91cf\u7684\u89e3\uff0c\u5e76\u53ef\u901a\u8fc7\u63a2\u7d22-\u5229\u7528\u673a\u5236\u7f13\u89e3\u6027\u80fd\u505c\u6ede\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0cR1\u7b49\u65b9\u6cd5\u672a\u663e\u5f0f\u5212\u5206\u8bc4\u4f30\u5668\u4e0e\u540e\u5904\u7406\u5668\uff0c\u4e5f\u672a\u5206\u522b\u4f18\u5316\u5176\u8bc4\u4f30\u4e0e\u7ea0\u9519\u80fd\u529b<\/span><span>\uff08\u5982rStar-Math [Guan \u7b49\uff0c2025]\uff09<\/span><span>\uff0c\u800c\u662f\u5728\u7edf\u4e00\u7684\u884c\u52a8\u7a7a\u95f4\u4e0e\u7ed3\u679c\u5956\u52b1\u5f15\u5bfc\u4e0b\uff0c\u540c\u65f6\u4f18\u5316\u63a8\u7406\u3001\u9a8c\u8bc1\u3001\u8bc4\u4f30\u3001\u53cd\u601d\u4e0e\u7ea0\u9519\u80fd\u529b\u3002\u56e0\u6b64\u6211\u4eec\u8ba4\u4e3a\uff0cR1\u7c7b\u5de5\u4f5c\u5728\u5b9e\u8d28\u4e0a\u5df2\u5b9e\u73b0\u63a8\u7406\u3001\u8bc4\u4f30\u4e0e\u540e\u5904\u7406\u80fd\u529b\u7684\u534f\u540c\u8fdb\u5316\u3002\u57fa\u4e8e\u6b64\u7ed3\u8bba\uff0c\u4e5f\u53ef\u89e3\u91ca\u4e3a\u4f55\u5176\u6027\u80fd\u8d85\u8d8a\u4ee5\u5f80\u4ec5\u805a\u7126\u4e8e\u5355\u4e00\u6216\u53cc\u6a21\u5757\u4f18\u5316\u7684\u5de5\u4f5c\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>6.&nbsp;<\/span><span>\u57fa\u4e8e\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u89e3\u8bfb O1 \u7c7b\u7814\u7a76<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u57fa\u4e8e\u5bf9\u81ea\u6211\u8fdb\u5316\u6280\u672f\u6846\u67b6\u7684\u8ba8\u8bba\uff0c\u672c\u8282\u4ece\u81ea\u6211\u8fdb\u5316\u7684\u89c6\u89d2\u91cd\u65b0\u89e3\u8bfb O1\u7c7b\u5de5\u4f5c\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>Marco-O1&nbsp;<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>Marco-O1<\/span><span>&nbsp;[Zhao \u7b49\uff0c2024a]&nbsp;<\/span><span>\u4f7f\u7528\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22<\/span><span>\uff08MCTS\uff09<\/span><span>\u751f\u6210\u6570\u636e\u96c6\uff0c\u5e76\u5728\u8be5\u6570\u636e\u96c6\u4e0a\u8fdb\u884c\u76d1\u7763\u5fae\u8c03<\/span><span>\uff08SFT\uff09<\/span><span>\u3002\u5c3d\u7ba1\u672a\u5f15\u5165\u8fed\u4ee3\u8bad\u7ec3\uff0cMCTS \u91c7\u6837\u4f53\u73b0\u4e86\u6570\u636e\u8fdb\u5316\uff0cSFT \u5219\u4ee3\u8868\u6a21\u578b\u8fdb\u5316\u3002\u7136\u800c\uff0c\u7f3a\u4e4f\u8fed\u4ee3\u8fc7\u7a0b\u9650\u5236\u4e86\u63a8\u7406\u6027\u80fd\u7684\u6301\u7eed\u63d0\u5347\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>O1 Journey<\/span><span>&nbsp;[Qin \u7b49\uff0c2024; Huang \u7b49\uff0c2024]&nbsp;<\/span><span>\u5f15\u5165\u201c\u65c5\u7a0b\u5b66\u4e60\u201d<\/span><span>\uff08Journey Learning\uff09<\/span><span>\u6982\u5ff5\uff0c\u63a2\u7d22\u878d\u5408\u81ea\u6211\u53cd\u601d\u3001\u81ea\u6211\u7ea0\u6b63\u4e0e\u56de\u6eaf\u7684\u63a8\u7406\u8fc7\u7a0b\uff0c\u5951\u5408\u7b2c 3.2.3 \u8282\u6240\u8ff0\u7684\u957f\u601d\u7ef4\u94fe<\/span><span>\uff08Long CoT\uff09<\/span><span>\u3002\u751f\u6210\u7684\u601d\u7ef4\u94fe\u4f9d\u636e\u7b54\u6848\u6b63\u786e\u6027\u5212\u5206\u4e3a\u6b63\u8d1f\u6837\u672c\uff0c\u5e76\u901a\u8fc7 DPO \u4f18\u5316\uff0c\u4f53\u73b0\u6a21\u578b\u8fdb\u5316\u3002\u5c3d\u7ba1\u672a\u663e\u5f0f\u91c7\u7528\u81ea\u6211\u8fdb\u5316\u673a\u5236\uff0c\u5176\u5f3a\u5927\u6027\u80fd\u6e90\u4e8e\u5bf9\u9690\u5f0f\u8bd5\u9519\u80fd\u529b\u7684\u6df1\u5c42\u5efa\u6a21\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>Slow Thinking<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><br \/><\/span><\/strong><\/em><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u7b2c1\u90e8\u5206<\/span><\/span><\/strong><\/em><em style=\"font-size: 15px\"><span><span>\uff1a<\/span><\/span><\/em><span style=\"font-size: 15px\"><span>Slow Thinking<\/span><span>&nbsp;[Jiang \u7b49\uff0c2024a]<\/span><span>&nbsp;\u7684\u7b2c\u4e00\u9636\u6bb5\u91c7\u7528\u4e24\u9636\u6bb5\u8fed\u4ee3\u8bad\u7ec3\uff1a\u5148\u7531\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u57fa\u4e8e MCTS \u641c\u7d22\u751f\u6210\u89e3\u7b54\u53ca\u8bc4\u5206<\/span><span>\uff08\u5bf9\u5e94\u6570\u636e\u8fdb\u5316\uff09<\/span><span>\uff0c\u518d\u4ee5 DPO \u8054\u5408\u4f18\u5316\u4e24\u4e2a\u6a21\u5757<\/span><span>\uff08\u5bf9\u5e94\u6a21\u578b\u8fdb\u5316\uff09<\/span><span>\u3002\u7531\u4e8e\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u8054\u5408\u4f18\u5316\uff0c\u8be5\u65b9\u6cd5\u53ef\u5f52\u7c7b\u4e3a\u201c\u63a8\u7406\u5668 + \u8bc4\u4f30\u5668\u201d\u81ea\u6211\u8fdb\u5316\u6a21\u5f0f\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><em style=\"font-size: 15px\"><strong><span><span>\u7b2c2\u90e8\u5206<\/span><\/span><\/strong><\/em><span style=\"font-size: 15px\"><span>\uff1aSlow Thinking<\/span><span>&nbsp;[Min \u7b49\uff0c2024]&nbsp;<\/span><span>\u7b2c\u4e8c\u9636\u6bb5\u57fa\u4e8e QwQ<\/span><span>&nbsp;[Team\uff0c2024b]<\/span><span>&nbsp;\u548c DeepSeek<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]&nbsp;<\/span><span>\u63d0\u70bc\u7684\u957f\u5f62\u5f0f\u601d\u7ef4\u80fd\u529b\uff0c\u901a\u8fc7\u63a2\u7d22\u2014\u5b66\u4e60\u5faa\u73af\u5b8c\u6210\u81ea\u6211\u8fdb\u5316\u3002\u957f\u601d\u7ef4\u94fe\u7684\u751f\u6210\u53cd\u6620\u6570\u636e\u8fdb\u5316\u4e2d\u7684\u9690\u5f0f\u8bd5\u9519\uff0c\u968f\u540e\u4ee5 SFT \u6216 DPO \u4f18\u5316\u63a8\u7406\u5668\uff0c\u6784\u6210\u7cfb\u7edf\u81ea\u6211\u8fdb\u5316\u7684\u5b8c\u6574\u95ed\u73af\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>rStar-Math&nbsp;<\/span><span>&nbsp;<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>rStar-Math<\/span><span>&nbsp;[Guan \u7b49\uff0c2025]&nbsp;<\/span><span>\u662f\u5178\u578b\u7684\u63a8\u7406\u81ea\u6211\u8fdb\u5316\u7cfb\u7edf\uff0c\u5305\u542b\u4e09\u8f6e\u8bad\u7ec3\uff1a1\uff09\u7ec8\u7aef\u5f15\u5bfc\u7684 MCTS \u6536\u96c6\u9ad8\u8d28\u91cf\u6570\u636e\u7528\u4e8e\u63a8\u7406\u5668\u7684 SFT\uff1b2\uff09\u4f7f\u7528\u8be5\u6570\u636e\u8bad\u7ec3\u8bc4\u4f30\u5668<\/span><span>\uff08PRM\uff09<\/span><span>\uff1b3\uff09\u5229\u7528 PRM \u5f15\u5bfc\u7684 MCTS \u91c7\u96c6\u65b0\u6570\u636e\uff0c\u91cd\u8bad\u63a8\u7406\u5668\u4e0e\u8bc4\u4f30\u5668\u3002\u6bcf\u8f6e\u5747\u6db5\u76d6\u6570\u636e\u4e0e\u6a21\u578b\u53cc\u91cd\u8fdb\u5316\uff0c\u4e14\u5404\u8f6e\u8bad\u7ec3\u805a\u7126\u4e8e\u4e0d\u540c\u80fd\u529b\uff0c\u6700\u7ec8\u5b9e\u73b0\u6574\u4f53\u6027\u80fd\u8de8\u8f6e\u8dc3\u5347\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>OpenR\/O1-Coder &nbsp;<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>OpenR<\/span><span>&nbsp;[Wang \u7b49\uff0c2024e]&nbsp;<\/span><span>\u4e0e O1-Coder<\/span><span>&nbsp;[Zhang \u7b49\uff0c2024j]&nbsp;<\/span><span>\u4f7f\u7528\u5f3a\u5316\u5b66\u4e60\u8054\u5408\u8bad\u7ec3\u7b56\u7565\u6a21\u578b<\/span><span>\uff08\u63a8\u7406\u5668\uff09<\/span><span>\u4e0e\u8bc4\u4f30\u5668<\/span><span>\uff08PRM\uff09<\/span><span>\u3002\u7b56\u7565\u6a21\u578b\u901a\u8fc7\u6811\u641c\u7d22<\/span><span>\uff08\u5982\u675f\u641c\u7d22\u3001MCTS\uff09<\/span><span>\u63a2\u7d22\u89e3\u7b54\uff0cPRM \u5219\u63d0\u4f9b\u5956\u52b1\u6307\u5bfc\u8bad\u7ec3\uff0c\u5206\u522b\u4f53\u73b0\u6570\u636e\u4e0e\u6a21\u578b\u8fdb\u5316\u3002\u4e24\u6a21\u5757\u5728\u5f3a\u5316\u5b66\u4e60\u6846\u67b6\u4e0b\u5b9e\u73b0\u6301\u7eed\u8054\u52a8\u5f0f\u81ea\u6211\u8fdb\u5316\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>DeepSeek R1\/Kimi k1.5&nbsp;<\/span><span>&nbsp;<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>R1<\/span><span>&nbsp;[DeepSeek-AI \u7b49\uff0c2025]&nbsp;<\/span><span>\u4e0e Kimi k1.5<\/span><span>&nbsp;[Team \u7b49\uff0c2025]&nbsp;<\/span><span>\u662f\u5f53\u524d\u9886\u5148\u7684\u5f00\u6e90\u63a8\u7406\u6a21\u578b\uff0c\u6027\u80fd\u5df2\u5ab2\u7f8e\u751a\u81f3\u8d85\u8d8a O1&nbsp;<\/span><span>[OpenAI\uff0c2024b]<\/span><span>\u3002\u5176\u6838\u5fc3\u7b97\u6cd5\u4e00\u81f4\uff0c\u91c7\u7528\u5728\u7ebf\u5f3a\u5316\u5b66\u4e60\u8bad\u7ec3\uff0c\u4ec5\u4f9d\u8d56\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08ORM\uff09<\/span><span>\u8fdb\u884c\u4f18\u5316\uff0c\u9f13\u52b1\u7b56\u7565\u63a2\u7d22\u5e76\u6fc0\u53d1\u957f\u601d\u7ef4\u94fe\u80fd\u529b\u7684\u6d8c\u73b0\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u6b64\u5916\uff0c\u8be5\u7c7b\u5de5\u4f5c\u91c7\u7528\u7684 RL \u8bad\u7ec3\u8303\u5f0f\u4e0e\u81ea\u6211\u8fdb\u5316\u7406\u5ff5\u9ad8\u5ea6\u5951\u5408\uff1a\u7b56\u7565\u63a2\u7d22\u5bf9\u5e94\u6570\u636e\u8fdb\u5316\uff0c\u5956\u52b1\u9a71\u52a8\u5bf9\u5e94\u6a21\u578b\u8fdb\u5316\u3002\u7cfb\u7edf\u901a\u8fc7\u63a2\u7d22\u2014\u5b66\u4e60\u7684\u5faa\u73af\u5b9e\u73b0\u6301\u7eed\u8fdb\u5316\u3002\u66f4\u91cd\u8981\u7684\u662f\uff0cR1 \u7b49\u5de5\u4f5c\u4e0d\u4ec5\u4f18\u5316\u9010\u6b65\u63a8\u7406\uff0c\u8fd8\u540c\u65f6\u63d0\u5347\u8bc4\u4f30\u3001\u53cd\u601d\u4e0e\u81ea\u6211\u7ea0\u9519\u80fd\u529b\uff0c\u7b26\u5408\u7b2c 5.3.5 \u8282\u6240\u8ff0\u7684\u201c\u63a8\u7406\u5668 + \u8bc4\u4f30\u5668 + \u540e\u5904\u7406\u5668\u201d\u5171\u8fdb\u6a21\u5f0f\u3002\u6b63\u662f\u8fd9\u79cd\u591a\u6a21\u5757\u534f\u540c\u8fdb\u5316\uff0c\u4f7f\u5176\u5728\u6027\u80fd\u4e0a\u663e\u8457\u4f18\u4e8e\u4ec5\u8fdb\u5316\u5355\u6a21\u5757\u7684\u65e9\u671f\u7cfb\u7edf\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>7.&nbsp;<\/span><span>\u672a\u6765\u6311\u6218\u548c\u65b9\u5411<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5982\u4f55\u66f4\u6709\u6548\u5730\u81ea\u6211\u8fdb\u5316\u63a8\u7406\u80fd\u529b\uff1f<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u66f4\u5177\u524d\u666f\u7684\u81ea\u6211\u8fdb\u5316\u6a21\u5f0f\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u524d\u6587\u63d0\u53ca\u4e94\u79cd\u5e38\u89c1\u7684\u81ea\u6211\u8fdb\u5316\u6a21\u5f0f\uff0c\u4f46\u4ece\u7406\u8bba\u4e0a\u8bb2\uff0c\u8fd9\u56db\u4e2a\u6a21\u5757\u5b58\u5728 &nbsp;<\/span><\/span><span style=\"font-size: 15px\"><span data-meta-block-props=\"{&quot;blockId&quot;:&quot;f0351821-9ef4-483f-9066-f4ebe93391b4&quot;,&quot;blockType&quot;:&quot;EQUATION_BLOCK&quot;,&quot;initData&quot;:{},&quot;props&quot;:{&quot;data&quot;:{&quot;equation&quot;:&quot;2^4-1=15n&quot;},&quot;displayMode&quot;:&quot;inline&quot;,&quot;viewType&quot;:&quot;inline&quot;}}\"><span><span>$$2^4-1=15$$<\/span><\/span><\/span><\/span><span style=\"font-size: 15px\"><span>&nbsp;\u79cd\u53ef\u80fd\u7684\u4f18\u5316\u7ec4\u5408\u3002\u901a\u8fc7\u63a2\u7d22\u4e0d\u540c\u7684\u6a21\u5757\u7ec4\u5408\u53ca\u5408\u4f5c\u4e0e\u5bf9\u6297\u7b49\u8bad\u7ec3\u7b56\u7565\uff0c\u6709\u671b\u6784\u5efa\u66f4\u9ad8\u6548\u7684\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u3002\u7406\u60f3\u60c5\u51b5\u4e0b\uff0c\u56db\u4e2a\u6a21\u5757\u7684\u540c\u6b65\u589e\u5f3a\u5c06\u5e26\u6765\u6301\u7eed\u4e14\u663e\u8457\u7684\u6027\u80fd\u63d0\u5347\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u7cfb\u7edf\u6cdb\u5316\uff1a<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u81ea\u6211\u8fdb\u5316\u901a\u8fc7\u8fed\u4ee3\u8bad\u7ec3\u63d0\u5347\u7cfb\u7edf\u6027\u80fd\u3002\u6301\u7eed\u8fdb\u5316\u7684\u5173\u952e\u5728\u4e8e\u9632\u6b62\u8fc7\u62df\u5408\u5e76\u786e\u4fdd\u6cdb\u5316\u80fd\u529b\u3002\u9996\u5148\uff0c\u4efb\u52a1\u6cdb\u5316\u81f3\u5173\u91cd\u8981\uff1b\u5408\u6210\u66f4\u591a\u6837\u5316\u548c\u590d\u6742\u7684\u4efb\u52a1\u53ef\u4ee5\u786e\u4fdd\u66f4\u5e7f\u6cdb\u7684\u8986\u76d6\u8303\u56f4\uff0c\u8fd9\u662f\u89e3\u51b3\u6cdb\u5316\u95ee\u9898\u7684\u57fa\u7840<\/span><span>&nbsp;[Yu \u7b49\uff0c2024a]<\/span><span>\u3002\u5176\u6b21\uff0c\u63a8\u7406\u5668\u3001\u8bc4\u4f30\u5668\u548c\u540e\u5904\u7406\u5668\u7684\u6cdb\u5316\u80fd\u529b\u540c\u6837\u91cd\u8981\u3002B-StAR<\/span><span>&nbsp;[Zeng \u7b49\uff0c2024a]<\/span><span>&nbsp;\u8868\u660e\uff0c\u589e\u5f3a\u63a8\u7406\u5668\u7684\u63a2\u7d22\u80fd\u529b\u53ef\u4ee5\u51cf\u5c11\u8fc7\u62df\u5408\u3002\u540e\u5904\u7406\u5668\u5728\u591a\u6837\u5316\u89e3\u51b3\u65b9\u6848\u65b9\u9762\u4e5f\u53d1\u6325\u7740\u5173\u952e\u4f5c\u7528\u3002\u6b64\u5916\uff0c\u5956\u52b1\u6b3a\u9a97\u95ee\u9898\u8868\u660e\uff0c\u5f53\u524d\u8bc4\u4f30\u5668\u53ef\u80fd\u5bf9\u63a8\u7406\u5668\u8fc7\u62df\u5408\u5e76\u5229\u7528\u5956\u52b1\u6377\u5f84\u3002\u603b\u4e4b\uff0c\u63a8\u7406\u7cfb\u7edf\u7684\u6cdb\u5316\u80fd\u529b\u5bf9\u4e8e\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u4e2d\u7684\u6301\u7eed\u63d0\u5347\u81f3\u5173\u91cd\u8981\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u6211\u4eec\u5982\u4f55\u5728\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u5185\u8fdb\u4e00\u6b65\u589e\u5f3a\u5927\u8bed\u8a00\u6a21\u578b\u7684\u590d\u6742\u63a8\u7406\u80fd\u529b\uff1f<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span style=\"font-weight: normal\">\u5c3d\u7ba1 O1 \u548c R1 \u7b49\u6a21\u578b\u5c55\u793a\u4e86\u4ee4\u4eba\u5370\u8c61\u6df1\u523b\u7684\u63a8\u7406\u80fd\u529b\uff0c\u4f46\u4ecd\u6709\u663e\u8457\u7684\u6539\u8fdb\u7a7a\u95f4\uff0c\u5305\u62ec\u589e\u5f3a\u63a8\u7406\u80fd\u529b\u548c\u63d0\u9ad8\u8bcd\u5143\u6548\u7387\u3002\u6301\u7eed\u8bad\u7ec3\u81f3\u5173\u91cd\u8981\uff0c\u4f46\u5e94\u4e13\u6ce8\u4e8e\u89e3\u51b3\u5173\u952e\u6311\u6218\u3002\u5728\u672a\u6765\u7814\u7a76\u4e2d\uff0c\u4ee5\u4e0b\u51e0\u4e2a\u5173\u952e\u95ee\u9898\u4ecd\u9700\u89e3\u51b3\uff1a<\/span><\/span><\/strong><\/p>\n<ul style=\"margin-left: 8px;margin-right: 8px\" class=\"list-paddingleft-1\">\n<li style=\"text-align: left\">\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5982\u4f55\u8fdb\u4e00\u6b65\u589e\u5f3a\u4efb\u52a1\u591a\u6837\u6027\uff1f<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u8270\u96be\u4efb\u52a1\u662f\u63d0\u5347\u7cfb\u7edf\u6cdb\u5316\u80fd\u529b\u7684\u6709\u6548\u9014\u5f84\u4e4b\u4e00\u3002\u4f8b\u5982\uff0cMin \u7b49<\/span><span>&nbsp;[2024]&nbsp;<\/span><span>\u6307\u51fa\uff0c\u7531\u4e8e\u4efb\u52a1\u6c60\u7a00\u758f\uff0c\u6a21\u578b\u5728\u4ec5\u7ecf\u5386\u5c11\u91cf\u8fed\u4ee3\u8bad\u7ec3\u540e\u4fbf\u8d8b\u4e8e\u6536\u655b\u3002\u4e3a\u7ef4\u6301\u6301\u7eed\u7684\u81ea\u6211\u8fdb\u5316\uff0c\u4e9f\u9700\u63d0\u5347\u4efb\u52a1\u7684\u591a\u6837\u6027\u4e0e\u590d\u6742\u6027\u3002\u5c3d\u7ba1 R1 \u7b49\u65b9\u6cd5\u6709\u6548\u589e\u5f3a\u4e86\u9010\u6b65\u63a8\u7406\u3001\u81ea\u6211\u8bc4\u4f30\u4e0e\u81ea\u6211\u7ea0\u6b63\u7b49\u80fd\u529b\uff0c\u4f46\u5c1a\u672a\u7eb3\u5165\u4efb\u52a1\u8fdb\u5316\u673a\u5236\u3002\u82e5\u80fd\u5f15\u5165\u6709\u6548\u7684\u4efb\u52a1\u8fdb\u5316\uff0c\u6709\u671b\u5b9e\u73b0\u66f4\u663e\u8457\u4e14\u6301\u4e45\u7684\u6027\u80fd\u63d0\u5347\u3002\u76ee\u524d\uff0c\u76f8\u5173\u65b9\u6cd5\u4ecd\u8f83\u4e3a\u521d\u7ea7\uff0c\u4e9f\u5f85\u8fdb\u4e00\u6b65\u7814\u7a76\u4ee5\u6784\u5efa\u66f4\u52a0\u591a\u6837\u5316\u3001\u590d\u6742\u4e14\u5177\u6311\u6218\u6027\u7684\u4efb\u52a1\u96c6\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 16px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5982\u4f55\u5f00\u53d1\u66f4\u7ec6\u81f4\u7684\u5956\u52b1\u5efa\u6a21\uff1f<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>R1 \u7b49\u5de5\u4f5c\u8868\u660e\uff0c\u4ec5\u4f7f\u7528\u7ed3\u679c\u5956\u52b1\u6a21\u578b<\/span><span>\uff08ORM\uff09<\/span><span>\u5373\u53ef\u5b9e\u73b0\u4ee4\u4eba\u6ee1\u610f\u7684\u63a8\u7406\u80fd\u529b\uff0c\u800c\u5176\u5728\u8499\u7279\u5361\u6d1b\u6811\u641c\u7d22+\u8fc7\u7a0b\u5956\u52b1\u6a21\u578b<\/span><span>\uff08PRM\uff09<\/span><span>\u65b9\u9762\u7684\u5931\u8d25\u5c1d\u8bd5\u52a0\u5267\u4e86\u5bf9 PRM \u5b9e\u7528\u6027\u7684\u8d28\u7591\u3002\u4e0e\u53ef\u5b66\u4e60\u7684 PRM \u76f8\u6bd4\uff0cR1 \u91c7\u7528\u7684\u57fa\u4e8e\u89c4\u5219\u7684 ORM \u5728\u6cdb\u5316\u548c\u7f13\u89e3\u5956\u52b1\u6b3a\u9a97\u65b9\u9762\u5177\u5907\u4f18\u52bf\u3002\u7136\u800c\uff0c\u8fd9\u79cd ORM \u5728\u4f18\u5316\u8fc7\u7a0b\u4e2d\u65e0\u6cd5\u63d0\u4f9b\u7ec6\u7c92\u5ea6\u7684\u5956\u52b1\u3002\u5206\u6790\u8868\u660e\uff0cR1 \u7b49\u6a21\u578b\u503e\u5411\u4e8e\u5728\u7b80\u5355\u95ee\u9898\u4e0a\u8fc7\u5ea6\u601d\u8003\uff0c\u800c\u5728\u590d\u6742\u95ee\u9898\u4e0a\u601d\u8003\u4e0d\u8db3<\/span><span>&nbsp;[Chen \u7b49\uff0c2024f; Wang \u7b49\uff0c2025a]<\/span><span>\uff0c\u8fd9\u53ef\u80fd\u6fc0\u52b1 PRM \u7684\u7814\u7a76\uff0c\u56e0\u4e3a PRM \u53ef\u4ee5\u63d0\u4f9b\u8fc7\u7a0b\u4fe1\u53f7\u4ee5\u6307\u5bfc\u9ad8\u6548\u7684\u9010\u6b65\u63a8\u7406\u3002\u7136\u800c\uff0cPRM \u7684\u5f31\u6cdb\u5316\u80fd\u529b\u3001\u6301\u7eed\u66f4\u65b0\u548c\u5956\u52b1\u6b3a\u9a97\u7b49\u6311\u6218\u4ecd\u662f\u5176\u8fdb\u4e00\u6b65\u53d1\u5c55\u7684\u91cd\u5927\u969c\u788d\u3002R1 \u901a\u8fc7\u81ea\u6211\u8bc4\u4f30\u5b9e\u73b0 PRM\uff0c\u5e76\u4f7f\u7528 ORM \u540c\u65f6\u4f18\u5316\u9010\u6b65\u63a8\u7406\u3001\u81ea\u6211\u8bc4\u4f30\u548c\u81ea\u6211\u7ea0\u6b63\u80fd\u529b\uff0c\u4f46\u5e76\u672a\u7279\u522b\u4f18\u5316\u81ea\u6211\u8bc4\u4f30\u3002\u81ea\u6211\u8bc4\u4f30\u7684\u6301\u7eed\u6709\u6548\u589e\u5f3a\u4ecd\u9700\u8fdb\u4e00\u6b65\u7814\u7a76\u3002<\/span><\/span><\/p>\n<\/li>\n<li>\n<p style=\"margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u5982\u4f55\u5e73\u8861\u6548\u7387\u4e0e\u6709\u6548\u6027\u4ee5\u786e\u5b9a\u6700\u4f73\u601d\u7ef4\u94fe\u8fdb\u5316\uff1f<\/span><\/span><\/strong><span style=\"font-size: 15px\"><span>\u77ed\u601d\u7ef4\u94fe\u7684\u663e\u5f0f\u6811\u641c\u7d22\u5177\u5907\u8f83\u9ad8\u6548\u7387\uff0c\u4f46\u7f3a\u4e4f\u957f\u601d\u7ef4\u94fe\u7684\u6cdb\u5316\u80fd\u529b\u3002\u5c3d\u7ba1\u8bd5\u9519\u641c\u7d22\u6a21\u4eff\u4e86\u4eba\u7c7b\u63a8\u7406\uff0c\u4f46\u5176\u5b58\u5728\u8fc7\u5ea6\u601d\u8003\u548c\u601d\u8003\u4e0d\u8db3\u7b49\u7f3a\u9677\u3002\u56e0\u6b64\u9700\u8981\u601d\u8003\u5982\u4f55\u5728\u63a8\u7406\u65f6\u8ba1\u7b97\u9636\u6bb5\u7ed3\u5408\u4e24\u79cd\u641c\u7d22\u7c7b\u578b\u7684\u4f18\u52bf\u3002\u4e00\u79cd\u53ef\u80fd\u7684\u89e3\u51b3\u65b9\u6848\u662f\u589e\u5f3a\u5927\u8bed\u8a00\u6a21\u578b\u7684\u81ea\u6211\u8bc4\u4f30\u548c\u81ea\u6211\u7ea0\u6b63\u80fd\u529b\uff0c\u4ee5\u7f13\u89e3\u8fc7\u5ea6\u601d\u8003\u548c\u601d\u8003\u4e0d\u8db3\u3002\u53e6\u4e00\u4e2a\u6f5c\u5728\u65b9\u5411\u662f\u5728\u63a8\u7406\u8fc7\u7a0b\u4e2d\u5c06\u663e\u5f0f\u6811\u641c\u7d22\u539f\u5219\u4e0e\u8bd5\u9519\u76f8\u7ed3\u5408\uff0c\u4ece\u800c\u63d0\u5347 R1 \u4e2d\u5e8f\u5217\u63a8\u7406\u7684\u6027\u80fd\u3002<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><br \/><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><strong style=\"font-size: 15px\"><span><span>\u81ea\u6211\u8fdb\u5316\u63a8\u7406\u5982\u4f55\u5e94\u7528\u4e8e\u5177\u8eab\u667a\u80fd\u573a\u666f\uff1f<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u672c\u7efc\u8ff0\u805a\u7126\u4e8e\u6587\u672c\u6a21\u6001\u4e2d\u590d\u6742\u63a8\u7406\u4efb\u52a1\u7684\u81ea\u6211\u8fdb\u5316\u3002\u7136\u800c\uff0c\u672a\u6765\u7684\u4eba\u5de5\u667a\u80fd\u7cfb\u7edf\u9700\u8981\u4e0e\u73b0\u5b9e\u4e16\u754c\u4ea4\u4e92<\/span><span>&nbsp;[Wang \u7b49\uff0c2024d]<\/span><span>\uff0c\u5176\u4e2d\u8bb8\u591a\u573a\u666f\u9700\u8981\u8de8\u591a\u6a21\u6001\u6570\u636e\u8fdb\u884c\u63a8\u7406<\/span><span>&nbsp;[Xiang \u7b49\uff0c2024; Yao \u7b49\uff0c2024a; Wu \u7b49\uff0c2025a]<\/span><span>\u3002\u4e3a\u5b9e\u73b0\u8fd9\u4e00\u76ee\u6807\uff0c\u5fc5\u987b\u89e3\u51b3\u4ee5\u4e0b\u6311\u6218\uff1a\u9996\u5148\uff0c\u5168\u9762\u7406\u89e3\u591a\u6a21\u6001\u6570\u636e\u662f\u591a\u6a21\u6001\u63a8\u7406\u7684\u57fa\u7840\u3002\u5176\u6b21\uff0c\u5fc5\u987b\u91cd\u65b0\u5b9a\u4e49\u601d\u7ef4\u94fe\u7684\u683c\u5f0f\uff0c\u4f8b\u5982\u8003\u8651\u662f\u5426\u5e94\u5c06\u591a\u6a21\u6001\u6570\u636e\u7ec4\u6210\u7684\u8bcd\u5143\u7eb3\u5165\u601d\u7ef4\u94fe<\/span><span>&nbsp;[Li \u7b49\uff0c2025]<\/span><span>\u3002\u6b64\u5916\uff0c\u591a\u6a21\u6001\u573a\u666f<\/span><span>\uff08\u5982\u5177\u8eab\u667a\u80fd\uff09<\/span><span>\u4e2d\u7684\u8bb8\u591a\u63a8\u7406\u4efb\u52a1\u8fd8\u9762\u4e34\u73af\u5883\u4ea4\u4e92\u6210\u672c\u9ad8\u548c\u8bad\u7ec3\u6570\u636e\u8d44\u6e90\u6709\u9650\u7b49\u6311\u6218<\/span><span>&nbsp;[He \u7b49\uff0c2024a]<\/span><span>\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<h1 style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/h1>\n<h3 style=\", Arial, sans-serif;letter-spacing: 0.544px\">\n<section style=\"letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin-top: 10px;margin-bottom: 10px;letter-spacing: 0.544px;text-align: center\">\n<section style=\"vertical-align: middle\">\n<section style=\"margin-bottom: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"clear: both;line-height: 0\">\n<section style=\"line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;font-size: 16px;line-height: 1.4\">\n<p><strong><strong style=\"text-align: left;letter-spacing: 0.544px\"><span style=\", Arial, sans-serif\"><strong style=\"letter-spacing: 0.578px\"><span>8. \u7ed3\u8bba<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;, Arial, sans-serif\">\n<section style=\"float: left;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<section style=\"float: right;width: 8px;height: 3px;line-height: 0\"><span><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;letter-spacing: 0.578px;line-height: 1.75em\"><span><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>\u672c\u7efc\u8ff0\u4ece\u81ea\u6211\u8fdb\u5316\u7684\u89d2\u5ea6\uff0c\u7cfb\u7edf\u56de\u987e\u4e86\u5927\u8bed\u8a00\u6a21\u578b\u5728\u590d\u6742\u63a8\u7406\u65b9\u9762\u7684\u73b0\u6709\u7814\u7a76\u3002\u6211\u4eec\u9996\u5148\u4ece\u6570\u636e\u8fdb\u5316\u548c\u6a21\u578b\u8fdb\u5316\u4e24\u4e2a\u89c6\u89d2\u5ba1\u89c6\u4e86\u76f8\u5173\u6280\u672f\uff0c\u4e3a\u81ea\u6211\u8fdb\u5316\u5960\u5b9a\u4e86\u57fa\u7840\u3002\u968f\u540e\uff0c\u6211\u4eec\u5c06\u7126\u70b9\u8f6c\u5411\u81ea\u6211\u8fdb\u5316\u672c\u8eab\uff0c\u901a\u8fc7\u63a2\u7d22\u7cfb\u7edf\u6a21\u5757\u4e4b\u95f4\u7684\u8fdb\u5316\u5173\u7cfb\uff0c\u5206\u6790\u4e86\u73b0\u6709\u7684\u81ea\u6211\u8fdb\u5316\u7814\u7a76\u3002\u6b64\u5916\uff0c\u6211\u4eec\u8fdb\u4e00\u6b65\u5206\u6790\u548c\u603b\u7ed3\u4e86\u73b0\u6709\u7684\u7c7bO1\u5f00\u6e90\u7814\u7a76\uff0c\u53d1\u73b0\u8fd9\u4e9b\u7814\u7a76\u5747\u53ef\u7528\u6211\u4eec\u7684\u81ea\u6211\u8fdb\u5316\u6846\u67b6\u8fdb\u884c\u89e3\u91ca\u3002\u6700\u540e\uff0c\u6211\u4eec\u5e0c\u671b\u672c\u7efc\u8ff0\u80fd\u591f\u6fc0\u53d1\u66f4\u591a\u7814\u7a76\uff0c\u63a8\u52a8\u57fa\u4e8e\u5927\u8bed\u8a00\u6a21\u578b\u7684\u590d\u6742\u63a8\u7406\u8fdb\u4e00\u6b65\u53d1\u5c55\u3002<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span style=\"font-weight: bold\">\u8bd1\u8005\u6ce8\u91ca\uff1a<\/span><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em\"><span style=\"font-size: 15px\"><span>1. [Polu\u7b49\uff0c2022] \u5e94\u8be5\u662f &nbsp;<\/span><\/span><strong style=\"font-size: 15px\"><span><span>Expert iteration \u800c\u975e Expect interation<\/span><\/span><\/strong><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 15px\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.5825958702064897\" data-type=\"png\" data-w=\"678\" style=\"height: auto !important\" data-width=\"678\" data-height=\"395\" data-imgfileid=\"100227511\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-42f60ac4b335b276cd40270a4fb686ea.png\" \/><img class=\"rich_pages wxw-img\" data-ratio=\"0.32298136645962733\" data-type=\"png\" data-w=\"966\" style=\"height: auto !important\" data-width=\"966\" data-height=\"312\" data-imgfileid=\"100227512\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-2c5b2614dc6f8092d750ecb4b1ce84c3.png\" \/><\/span><\/p>\n<section style=\"margin-left: 8px;margin-right: 8px\"><span style=\"font-size: 15px\" data-mpa-action-id=\"m9h0tpxn1p1\" data-pm-slice=\"0 0 []\"><br \/><\/span><\/section>\n<section style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><span style=\"font-size: 15px\" data-mpa-action-id=\"m9h0tpxn1p1\" data-pm-slice=\"0 0 []\"><span style=\"font-weight: bold\">\u8bd1\u8005\u7b80\u4ecb<\/span><\/span><\/section>\n<section style=\"text-align: center;margin-left: 8px;margin-right: 8px;margin-bottom: 0px\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.2955010224948875\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"978\" style=\"height: auto !important\" data-imgfileid=\"100227809\" src=\"\/wp-content\/uploads\/2025\/04\/wxsync-2025-04-4e8a89aeba38e265d49d709936536b26.jpeg\" \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center\"><span style=\"font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;font-family: PingFangSC-light;font-size: 15px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\" data-pm-slice=\"0 0 []\">\n<section style=\"margin: 0px;padding: 0px 6px;max-width: 100%;width: 661px;vertical-align: top\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;line-height: 1.4\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span>\u90e8\u5206\u53c2\u8003\u6587\u732e\uff0c<\/span><span>\u5168\u90e8\u53c2\u8003\u6587\u732e\u8bf7\u67e5\u770b\u539f\u6587<\/span><\/strong><\/p>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;font-family: PingFangSC-light;font-size: 15px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-indent: 0px;text-transform: none;text-align: center;width: 661px\">\n<section style=\"margin: 0px;padding: 0px 0px 0px 4px;max-width: 100%;width: 661px;height: 360px;vertical-align: top\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;text-align: justify;font-size: 13px\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><span><br \/><\/span><\/p>\n<ul class=\"list-paddingleft-1\">\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Arash Ahmadian, Chris Cremer, Matthias Gall\u00e9, Marzieh Fadaee, Julia Kreutzer, Olivier Pietquin, Ahmet \u00dcst\u00fcn, and Sara Hooker. Back to basics: Revisiting REINFORCE-style optimization for learning from human feedback in LLMs. In Lun-Wei Ku, Andre Martins, and Vivek Srikumar, editors, Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics \uff08Volume 1: Long Papers\uff09, pages 12248\u201312267, Bangkok, Thailand, August 2024. Association for Computational Linguistics. doi: 10.18653\/v1\/2024.acl-long.662. URL https:\/\/aclanthology. org\/2024.acl-long.662\/.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Renat Aksitov, Sobhan Miryoosefi, Zong xiao Li, Daliang Li, Sheila Babayan, Kavya Kopparapu, Zachary Fisher, Ruiqi Guo, Sushant Prakash, Pranesh Srinivasan, Manzil Zaheer, Felix X. Yu, and Sanjiv Kumar. Rest meets react: Self-improvement for multi-step reasoning llm agent. ArXiv, abs\/2312.10003, 2023. URL https:\/\/arxiv.org\/pdf\/2312.10003.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Afra Feyza Aky\u00fcrek, Ekin Aky\u00fcrek, Aman Madaan, A. Kalyan, Peter Clark, Derry Tanti Wijaya, and Niket Tandon. Rl4f: Generating natural language feedback with reinforcement learning for repairing model outputs. In Annual Meeting of the Association for Computational Linguistics, 2023. URL https:\/\/aclanthology.org\/2023.acl-long.427.pdf.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Shengnan An, Zexiong Ma, Zeqi Lin, Nanning Zheng, Jian-Guang Lou, and Weizhu Chen. Learning from mistakes makes LLM better reasoner. CoRR, abs\/2310.20689, 2023. doi: 10.48550\/ARXIV. 2310.20689. URL https:\/\/doi.org\/10.48550\/arXiv.2310.20689.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Marcin Andrychowicz, Dwight Crow, Alex Ray, Jonas Schneider, Rachel Fong, Peter Welinder, Bob McGrew, Joshua Tobin, P. Abbeel, and Wojciech Zaremba. Hindsight experience replay. In Neural Information Processing Systems, 2017. URL https:\/\/arxiv.org\/pdf\/1707.01495.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Zachary Ankner, Cody Blakeney, Kartik K. Sreenivasan, Max Marion, Matthew L. Leavitt, and Mansheej Paul. Perplexed by perplexity: Perplexity-based data pruning with small reference models. ArXiv, abs\/2405.20541, 2024a. URL https:\/\/arxiv.org\/pdf\/2405.20541.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Zachary Ankner, Mansheej Paul, Brandon Cui, Jonathan D. Chang, and Prithviraj Ammanabrolu. Critique-out-loud reward models. CoRR, abs\/2408.11791, 2024b. doi: 10.48550\/ARXIV.2408. 11791. URL https:\/\/doi.org\/10.48550\/arXiv.2408.11791.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Mohammad Gheshlaghi Azar, Mark Rowland, Bilal Piot, Daniel Guo, Daniele Calandriello, Michal Valko, and R\u00e9mi Munos. A general theoretical paradigm to understand learning from human preferences. ArXiv, abs\/2310.12036, 2023. URL https:\/\/arxiv.org\/pdf\/2310.12036.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Ralph Allan Bradley and Milton E. Terry. Rank analysis of incomplete block designs: I. the method of paired comparisons. Biometrika, 39:324, 1952. URL https:\/\/api.semanticscholar.org\/ CorpusID:125209808.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Cameron Browne, Edward Jack Powley, Daniel Whitehouse, Simon M. M. Lucas, Peter I. Cowling, Philipp Rohlfshagen, Stephen Tavener, Diego Perez Liebana, Spyridon Samothrakis, and Simon Colton. A survey of monte carlo tree search methods. IEEE Transactions on Computational Intelligence and AI in Games, 4:1\u201343, 2012. URL https:\/\/ieeexplore.ieee.org\/document\/ 6145622.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Changyu Chen, Zi-Yan Liu, Chao Du, Tianyu Pang, Qian Liu, Arunesh Sinha, Pradeep Varakantham, and Min Lin. Bootstrapping language models with dpo implicit rewards. ArXiv, abs\/2406.09760, 2024a. URL https:\/\/arxiv.org\/pdf\/2406.09760. &nbsp;Guoxin Chen, Minpeng Liao, Chengxi Li, and Kai Fan. Alphamath almost zero: process supervision without process. ArXiv, abs\/2405.03553, 2024b. URL https:\/\/arxiv.org\/pdf\/2405.03553.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Guoxin Chen, Minpeng Liao, Chengxi Li, and Kai Fan. Step-level value preference optimization for mathematical reasoning. In Conference on Empirical Methods in Natural Language Processing, 2024c. URL https:\/\/arxiv.org\/pdf\/2406.10858.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Huayu Chen, Guande He, Lifan Yuan, Hang Su, and Jun Zhu. Noise contrastive alignment of language models with explicit rewards. ArXiv, abs\/2402.05369, 2024d. URL https:\/\/arxiv. org\/pdf\/2402.05369.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Justin Chih-Yao Chen, Zifeng Wang, Hamid Palangi, Rujun Han, Sayna Ebrahimi, Long T. Le, Vincent Perot, Swaroop Mishra, Mohit Bansal, Chen-Yu Lee, and Tomas Pfister. Reverse thinking makes llms stronger reasoners. ArXiv, abs\/2411.19865, 2024e. URL https:\/\/arxiv.org\/pdf\/ 2411.19865.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Wenhu Chen, Xueguang Ma, Xinyi Wang, and William W. Cohen. Program of thoughts prompting: Disentangling computation from reasoning for numerical reasoning tasks. Trans. Mach. Learn. Res., 2023, 2022. URL https:\/\/arxiv.org\/pdf\/2211.12588.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Xingyu Chen, Jiahao Xu, Tian Liang, Zhiwei He, Jianhui Pang, Dian Yu, Linfeng Song, Qiuzhi Liu, Mengfei Zhou, Zhuosheng Zhang, Rui Wang, Zhaopeng Tu, Haitao Mi, and Dong Yu. Do not think that much for 2+3=? on the overthinking of o1-like llms. ArXiv, abs\/2412.21187, 2024f. URL https:\/\/arxiv.org\/pdf\/2412.21187.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Xinyun Chen, Renat Aksitov, Uri Alon, Jie Ren, Kefan Xiao, Pengcheng Yin, Sushant Prakash, Charles Sutton, Xuezhi Wang, and Denny Zhou. Universal self-consistency for large language model generation. CoRR, abs\/2311.17311, 2023a. doi: 10.48550\/ARXIV.2311.17311. URL https:\/\/doi.org\/10.48550\/arXiv.2311.17311.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Xinyun Chen, Maxwell Lin, Nathanael Sch\u00e4rli, and Denny Zhou. Teaching large language models to self-debug. ArXiv, abs\/2304.05128, 2023b. URL https:\/\/doi.org\/10.48550\/arXiv.2304. 05128.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Zhaorun Chen, Zhaorun Chen, Zhuokai Zhao, Zhihong Zhu, Ruiqi Zhang, Xiang Li, Bhiksha Raj, and Huaxiu Yao. Autoprm: Automating procedural supervision for multi-step reasoning via controllable question decomposition. ArXiv, abs\/2402.11452, 2024g. URL https:\/\/aclanthology.org\/ 2024.naacl-long.73\/.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Zhipeng Chen, Kun Zhou, Wayne Xin Zhao, Junchen Wan, Fuzheng Zhang, Di Zhang, and Ji-Rong Wen. Improving large language models via fine-grained reinforcement learning with minimum editing constraint. In Annual Meeting of the Association for Computational Linguistics, 2024h. URL https:\/\/arxiv.org\/pdf\/2401.06081.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Zixiang Chen, Yihe Deng, Huizhuo Yuan, Kaixuan Ji, and Quanquan Gu. Self-play fine-tuning converts weak language models to strong language models. ArXiv, abs\/2401.01335, 2024i. URL https:\/\/arxiv.org\/pdf\/2401.01335.<\/span><\/span><\/p>\n<\/li>\n<li style=\"color:#888888\">\n<p style=\"text-align: left;line-height: 1em;margin-bottom: 8px\"><span style=\"font-size: 13px\"><span>Pengyu Cheng, Tianhao Hu, Han Xu, Zhisong Zhang, Yong Dai, Lei Han, and Nan Du. Selfplaying adversarial language game enhances llm reasoning. ArXiv, abs\/2404.10642, 2024. URL https:\/\/arxiv.org\/pdf\/2404.10642.<\/span><\/span><\/p>\n<\/li>\n<\/ul>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 3px 0px 0px;padding: 0px;max-width: 100%;font-family: PingFangSC-light;font-size: 15px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 14px;letter-spacing: 1px\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;text-align: center\"><span>\uff08\u53ef<\/span><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span>\u4e0a\u4e0b\u6ed1\u52a8<\/span><\/strong><span>\u67e5\u770b\uff09<\/span><\/p>\n<\/section>\n<\/section>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;text-align: justify;text-indent: 0px;text-transform: none;letter-spacing: 0.578px;line-height: 1.75em\" data-pm-slice=\"0 0 []\"><span><br \/><\/span><\/section>\n<h2 style=\"margin: 0px 8px;padding: 0px;font-weight: 400;font-size: 16px;max-width: 100%;, Arial, sans-serif;font-style: normal;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\" data-pm-slice=\"0 0 []\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><br \/><\/span><\/h2>\n<h3 style=\"margin: 0px;padding: 0px;font-weight: 400;font-size: 16px;max-width: 100%;font-style: normal;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;, Arial, sans-serif\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px;text-align: right;font-size: 13px\">\n<section style=\"margin: 10px 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px;text-align: center\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;vertical-align: middle\">\n<section style=\"margin: 0px 0px -2px;padding: 0px;max-width: 100%;, Arial, sans-serif\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;float: left;width: 8px;height: 3px;line-height: 0\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;float: right;width: 8px;height: 3px;line-height: 0\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;line-height: 0\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;line-height: 0;width: 0px\"><\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 0px;padding: 0px 10px;max-width: 100%;font-size: 16px;line-height: 1.4\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;text-align: left;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u53d1\u8d77\u4eba\u62db\u52df\uff1a<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;text-align: left;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.578px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u300c\u4eba\u673a\u534f\u540c\u7684\u667a\u80fd\u65f6\u4ee3\u300d\u4e3b\u9898\u8bfb\u4e66\u4f1a<\/span><\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin: -2px 0px 0px;padding: 0px;max-width: 100%;, Arial, sans-serif\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;float: left;width: 8px;height: 3px;line-height: 0\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;float: right;width: 8px;height: 3px;line-height: 0\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<h2 style=\"margin: 0px 8px;padding: 0px;font-weight: 400;font-size: 16px;max-width: 100%;, Arial, sans-serif;font-style: normal;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><br \/><\/span><\/h2>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span>\u968f\u7740\u4eba\u5de5\u667a\u80fd\u6280\u672f\u7684\u4e0d\u65ad\u53d1\u5c55\u548c\u666e\u53ca, \u4eba\u7c7b\u793e\u4f1a\u6b63\u5728\u7ecf\u5386\u524d\u6240\u672a\u6709\u7684\u53d8\u9769\u3002\u7279\u522b\u662f\u4ee5\u5927\u578b\u8bed\u8a00\u6a21\u578b\u4e3a\u4ee3\u8868\u7684\u65b0\u4e00\u4ee3\u4eba\u5de5\u667a\u80fd\u6280\u672f\u8ba9\u6211\u4eec\u9762\u4e34\u53cc\u91cd\u5fc3\u5883\uff1a\u4e00\u65b9\u9762\uff0c\u5927\u6a21\u578b\u91cd\u5851\u5404\u4e2a\u884c\u4e1a\u7684\u901f\u5ea6\u8ba9\u4eba\u7c7b\u9762\u4e34\u88ab\u53d6\u4ee3\u7684\u5a01\u80c1\uff1b\u53e6\u4e00\u65b9\u9762\uff0c\u4eba\u4eec\u53c8\u4e0d\u65ad\u7684\u8bd5\u63a2\u7740\u5927\u6a21\u578b\u7684\u80fd\u529b\u4e0a\u9650\uff1a\u4eba\u7c7b\u7684\u76f4\u89c9\u548c\u60c5\u611f\u7406\u89e3\u80fd\u529b\uff0c\u89e3\u91ca\u6027\u548c\u9002\u5e94\u6027\uff0c\u5305\u62ec\u4f26\u7406\u9053\u5fb7\u95ee\u9898\u90fd\u662f\u5927\u6a21\u578b\u8fc8\u5411\u667a\u80fd\u9700\u8981\u7ffb\u8d8a\u7684\u5927\u5c71\u3002<\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span>\u4eba\u673a\u534f\u540c\u80fd\u591f\u901a\u8fc7\u4eba\u4e0e\u673a\u5668\u8fdb\u884c\u5408\u4f5c\u548c\u534f\u540c\u7684\u5de5\u4f5c\u65b9\u5f0f, \u5728\u9ad8\u7ef4\u52a8\u6001\u73af\u5883\u4e0b\u5b8c\u6210\u590d\u6742\u7684\u51b3\u7b56\u4efb\u52a1, \u9700\u8981\u5c06\u4eba\u7c7b\u7684\u5f52\u7eb3\u3001 \u6f14\u7ece\u3001 \u6307\u6325\u3001\u51b3\u7b56\u7b49\u4f18\u52bf\u4e0e\u673a\u5668\u64c5\u957f\u7684\u8ba1\u7b97\u3001 \u5b58\u50a8\u3001 \u641c\u7d22\u3001 \u4f18\u5316\u7b49\u6280\u672f\u76f8\u7ed3\u5408, \u4ee5\u5b9e\u73b0\u6700\u4f73\u4e92\u8865\u6548\u679c\u3002\u8fd9\u4e0d\u4ec5\u4f1a\u5e26\u6765\u4fe1\u606f\u8bba\u3001 \u63a7\u5236\u8bba\u3001 \u7cfb\u7edf\u8bba\u3001 \u534f\u540c\u8bba\u7b49\u9886\u57df\u53d8\u9769\uff0c\u8fd8\u6709\u53ef\u80fd\u5f15\u53d1\u4e00\u573a\u4eba\u7c7b\u7684\u8ba4\u77e5\u9769\u547d\uff01<\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><br \/><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span>\u56e0\u6b64\uff0c\u6211\u4eec\u60f3\u8981\u62db\u52df\u5728\u4eba\u673a\u534f\u540c\u9886\u57df\u7684\u5b66\u8005\uff0c\u4e00\u8d77\u6765\u53d1\u8d77\u4ee5\u300c\u4eba\u673a\u534f\u540c\u7684\u667a\u80fd\u65f6\u4ee3\u300d\u4e3a\u4e3b\u9898\u7684\u8bfb\u4e66\u4f1a\uff0c\u4e00\u8d77\u6784\u5efa\u4e3b\u9898\u793e\u533a\uff0c\u68b3\u7406\u9886\u57df\u53d1\u5c55\u4e0e\u524d\u6cbf\uff0c\u5982\u679c\u4f60\u6709\u610f\u5411\uff0c\u8bf7\u4e0e\u6211\u4eec\u8054\u7cfb\uff1a<\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><br \/><\/span><\/p>\n<section style=\"margin: 0px 8px 24px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-indent: 0px;text-transform: none;text-align: center\"><img alt=\"\u56fe\u7247\" class=\"rich_pages wxw-img js_insertlocalimg\" data-ratio=\"0.5601851851851852\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"margin: 0px;padding: 0px;max-width: 100%;vertical-align: bottom;height: auto !important;width: 661px !important\" data-imgfileid=\"100227986\" src=\"\" \/><\/section>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 17px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.6em\"><span><br \/><\/span><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 16px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 16px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/span><\/strong><\/span><\/p>\n<section style=\"margin: 10px 0px 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-indent: 0px;text-transform: none;text-align: center;justify-content: center\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: auto;vertical-align: top;min-width: 10%;height: auto\">\n<section style=\"margin: 0px 0px -15px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px 10px;max-width: 100%;font-size: 16px;line-height: 2;letter-spacing: 3px\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u5927\u8bed\u8a00\u6a21\u578b\u4e0e\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u8bfb\u4e66\u4f1a<\/span><\/strong><\/p>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 1.75em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/p>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u96c6\u667a\u4ff1\u4e50\u90e8\u8054\u5408\u897f\u6e56\u5927\u5b66\u5de5\u5b66\u9662\u7279\u8058\u7814\u7a76\u5458\u8d75\u4e16\u94b0\u3001\u6d59\u6c5f\u5927\u5b66\u6559\u6388\u4efb\u6c81\u6e90\u3001\u9e4f\u57ce\u5b9e\u9a8c\u5ba4\u9ad8\u7ea7\u5de5\u7a0b\u5e08\u5d14\u91d1\u5f3a\uff0c\u5171\u540c\u53d1\u8d77<a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247681407&amp;idx=1&amp;sn=54aed4abdccf3e26b1add4bb8eb17d3c&amp;chksm=e8995ff2dfeed6e4fc9f8d4145253b0fd6819e5461bfbb7b47127d38520480b394cb40fd01db&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u300c\u5927\u8bed\u8a00\u6a21\u578b\u4e0e\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u300d\u8bfb\u4e66\u4f1a<\/a>\uff0c\u63a2\u7a76\u5927\u8bed\u8a00\u6a21\u578b\u7ed9\u673a\u5668\u4eba\u9886\u57df\u5e26\u6765\u7684\u65b0\u601d\u60f3\u65b0\u4ef7\u503c\u3002<\/span><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px;letter-spacing: 0.544px;, Arial, sans-serif\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u8bfb\u4e66\u4f1a\u5df2\u5b8c\u7ed3\uff0c\u73b0\u5728\u62a5\u540d\u53ef\u52a0\u5165\u793e\u7fa4\u5e76\u89e3\u9501\u56de\u653e\u89c6\u9891\u6743\u9650\u3002<\/span><\/span><\/span><\/section>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/span><\/section>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 16px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 16px\"><a href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247681407&amp;idx=1&amp;sn=54aed4abdccf3e26b1add4bb8eb17d3c&amp;chksm=e8995ff2dfeed6e4fc9f8d4145253b0fd6819e5461bfbb7b47127d38520480b394cb40fd01db&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"1\" rel=\"noopener noreferrer\"><span style=\"margin:0px;padding:0px;max-width:100%;vertical-align:bottom;overflow:hidden;width:100%\" class=\"js_jump_icon h5_image_link\"><img alt=\"\u56fe\u7247\" class=\"rich_pages wxw-img\" data-ratio=\"0.5712962962962963\" data-w=\"1080\" style=\"margin: 0px;padding: 0px;border: 0px;max-width: 100%;vertical-align: bottom;width: 661px !important;height: auto !important\" data-backw=\"562\" data-backh=\"321\" data-imgfileid=\"100203796\" src=\"\" \/><\/span><\/a><\/span><\/strong><\/span><\/section>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/p>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u8be6\u60c5\u8bf7\u89c1\uff1a<\/span><\/span><\/section>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px;text-decoration: underline\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247681407&amp;idx=1&amp;sn=54aed4abdccf3e26b1add4bb8eb17d3c&amp;chksm=e8995ff2dfeed6e4fc9f8d4145253b0fd6819e5461bfbb7b47127d38520480b394cb40fd01db&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u5927\u8bed\u8a00\u6a21\u578b\u4e0e\u591a\u667a\u80fd\u4f53\u7cfb\u7edf\u8bfb\u4e66\u4f1a\uff1a\u5927\u6a21\u578b\u8d4b\u80fd\u673a\u5668\u4eba\u6d8c\u73b0\u7fa4\u4f53\u667a\u80fd<\/a><\/span><\/span><\/section>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/p>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;text-align: justify;text-indent: 0px;text-transform: none;letter-spacing: 0.578px;line-height: 1.75em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<section style=\"margin: 10px 0px 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-indent: 0px;text-transform: none;text-align: center;justify-content: center\">\n<section style=\"margin: 0px;padding: 0px;max-width: 100%;width: auto;vertical-align: top;min-width: 10%;height: auto\">\n<section style=\"margin: 0px 0px -15px;padding: 0px;max-width: 100%\">\n<section style=\"margin: 0px;padding: 0px 10px;max-width: 100%;font-size: 16px;line-height: 2;letter-spacing: 3px\">\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u5177\u8eab\u667a\u80fd\u8bfb\u4e66\u4f1a\u542f\u52a8<\/span><\/strong><\/p>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;clear: both;min-height: 1em;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/section>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u96c6\u667a\u4ff1\u4e50\u90e8\u8054\u5408\u4e0a\u6d77\u4ea4\u901a\u5927\u5b66\u52a9\u7406\u6559\u6388\u674e\u6c38\u9732\u3001\u94f6\u6cb3\u901a\u7528\u673a\u5668\u4eba\u5408\u4f19\u4eba\u53f2\u96ea\u677e\u3001\u5357\u4eac\u5927\u5b66LAMDA\u7ec4\u535a\u58eb\u751f\u9648\u96c4\u8f89\u3001\u9999\u6e2f\u5927\u5b66\u5728\u8bfb\u535a\u58eb\u751f\u7a46\u5c27\uff0c\u5171\u540c\u53d1\u8d77\u9996\u5b63<a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247705641&amp;idx=1&amp;sn=596c47f8e1db38a8a56c4abebffd0e10&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u300c\u5177\u8eab\u667a\u80fd\u300d\u8bfb\u4e66\u4f1a<\/a>\u3002\u8bfb\u4e66\u4f1a\u8ba1\u5212\u91c7\u7528\u201c\u81ea\u4e0b\u800c\u4e0a\u201d\u7684\u5c42\u7ea7\u7ed3\u6784\uff0c\u63a2\u8ba8\u56db\u4e2a\u6838\u5fc3\u6a21\u5757\uff1a\u786c\u4ef6\u7cfb\u7edf\uff08\u673a\u5668\u4eba\u672c\u4f53\u8bbe\u8ba1\uff09\uff0c\u6570\u636e\u3001\u4eff\u771f\u73af\u5883\u4e0eBenchmark\uff0c\u673a\u5668\u4eba\u5b66\u4e60\uff0c\u5177\u4f53\u573a\u666f\u4efb\u52a1\u3002\u5e0c\u671b\u901a\u8fc7\u91cd\u70b9\u8ba8\u8bba\u7ecf\u5178\u3001\u524d\u6cbf\u7684\u91cd\u8981\u6587\u732e\uff0c\u5e2e\u52a9\u5927\u5bb6\u66f4\u597d\u5730\u5b66\u4e60\u673a\u5668\u4eba\u4e0e\u5177\u8eab\u667a\u80fd\u6280\u672f\u524d\u6cbf\u6280\u672f\uff0c\u4e3a\u76f8\u5173\u9886\u57df\u7684\u7814\u7a76\u548c\u5e94\u7528\u63d0\u4f9b\u6d1e\u89c1\u3002<\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">\u8bfb\u4e66\u4f1a\u4ece2025\u5e741\u670819\u65e5\u5f00\u59cb\uff0c\u6bcf\u5468\u65e514:00-16:00\uff0c\u6301\u7eed\u65f6\u95f4\u9884\u8ba1 6-8 \u5468\u5de6\u53f3\u3002\u6bcf\u5468\u8fdb\u884c\u7ebf\u4e0a\u4f1a\u8bae\uff0c\u4e0e\u4e3b\u8bb2\u4eba\u7b49\u793e\u533a\u6210\u5458\u5f53\u9762\u4ea4\u6d41\uff0c\u4e4b\u540e\u53ef\u4ee5\u83b7\u5f97\u89c6\u9891\u56de\u653e\u6301\u7eed\u5b66\u4e60\u3002<\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/span><\/p>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;line-height: 2em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px;letter-spacing: 0.544px;width: 100%\"><span class=\"js_jump_icon h5_image_link\" style=\"margin: 0px;padding: 0px;max-width: 100%;vertical-align: bottom;overflow: hidden\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><img alt=\"\u56fe\u7247\" class=\"rich_pages wxw-img\" data-ratio=\"0.5712962962962963\" data-w=\"1080\" style=\"margin: 0px;padding: 0px;border: 0px;max-width: 100%;vertical-align: bottom;height: auto !important;width: 661px !important\" data-backw=\"562\" data-backh=\"321\" data-imgfileid=\"100222126\" src=\"\" \/><\/span><\/span><\/span><\/p>\n<section style=\"margin: 0px 8px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;text-align: justify;text-indent: 0px;text-transform: none;letter-spacing: 0.578px;line-height: 1.75em\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%;font-size: 15px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/span><\/strong><\/span><\/section>\n<p style=\"margin: 0px 0px 5px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;, Arial, sans-serif\"><span style=\"margin-top: 0px;margin-bottom: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px\"><span style=\"margin-top: 0px;margin-bottom: 0px;padding: 0px;max-width: 100%\">\u8be6\u60c5\u8bf7\u89c1\uff1a<a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247705641&amp;idx=1&amp;sn=596c47f8e1db38a8a56c4abebffd0e10&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u5177\u8eab\u667a\u80fd\u8bfb\u4e66\u4f1a\u542f\u52a8\uff1a\u8d70\u5411\u73b0\u5b9e\u4e16\u754c\u7684\u4e0b\u4e00\u4ee3AI\u7cfb\u7edf<\/a><\/span><\/span><\/p>\n<p style=\"margin: 0px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;, Arial, sans-serif;font-size: 14px;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><br \/><\/span><\/p>\n<p style=\"margin: 0px 8px 5px;padding: 0px;max-width: 100%;clear: both;min-height: 1em\"><span><br \/><\/span><\/p>\n<p><span><br \/><\/span><\/p>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong><strong style=\"letter-spacing: 0.544px;font-size: 16px;text-align: center;, Arial, sans-serif\"><strong style=\"text-align: left;font-family: PingFangSC-light;letter-spacing: 0.544px\"><span style=\"font-size: 15px;letter-spacing: 0.544px\"><span>\u63a8\u8350\u9605\u8bfb<\/span><\/span><\/strong><\/strong><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong><span>1.&nbsp;<a href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247707721&amp;idx=1&amp;sn=52c0370762a13598d3c76cf090ecfac3&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\"><span style=\"text-decoration: underline\">DeepSeek-R1\uff5c\u96c6\u667a\u767e\u79d1<\/span><\/a><\/span><\/strong><span style=\"text-decoration: underline\"><strong><span><br \/><\/span><\/strong><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong style=\"letter-spacing: 0.544px\"><span>2.&nbsp;<a href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247661929&amp;idx=1&amp;sn=de9f5d1f70ca63fbad3e8ce68ee14a87&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\"><span style=\"text-decoration: underline\">Science\u524d\u6cbf\uff1a\u5927\u8bed\u8a00\u6a21\u578b\u6d8c\u73b0\u6f14\u5316\u4fe1\u606f\uff0c\u52a0\u901f\u86cb\u767d\u8d28\u7ed3\u6784\u9884\u6d4b<\/span><\/a><\/span><\/strong><span style=\"text-decoration: underline\"><strong style=\"letter-spacing: 0.544px\"><span><br \/><\/span><\/strong><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong style=\"letter-spacing: 0.544px\"><span>3.&nbsp;<a href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247660144&amp;idx=3&amp;sn=416328b1fae18e63e1d236291ed11246&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\"><span style=\"text-decoration: underline\">Science \u901f\u9012\uff1a\u5927\u8bed\u8a00\u6a21\u578b\u5bf9\u86cb\u767d\u8d28\u7ed3\u6784\u8fdb\u884c\u6f14\u5316\u5c3a\u5ea6\u9884\u6d4b<\/span><\/a><\/span><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;letter-spacing: 0.544px;, Arial, sans-serif;font-size: 14px\"><strong style=\"letter-spacing: 0.544px\"><\/strong><\/section>\n<section style=\"margin: 0px 8px 8px;padding: 0px;max-width: 100%;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;, Arial, sans-serif;font-size: 14px\" data-pm-slice=\"0 0 []\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">4.&nbsp;<\/span><\/strong><\/span><span style=\"margin: 0px;padding: 0px;max-width: 100%;text-decoration: underline\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247710871&amp;idx=1&amp;sn=f572fc0ff9abbc6b32185b835ed39122&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u6d8c\u73b0\u52a8\u529b\u5b66\u5982\u4f55\u7528\u6765\u5206\u6790\u590d\u6742\u7cfb\u7edf\uff1f | \u65b0\u8bfe\u4e0a\u7ebf<\/a><\/span><\/strong><\/strong><\/span><\/section>\n<section style=\"margin: 0px 8px 8px;padding: 0px;max-width: 100%;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;, Arial, sans-serif;font-size: 14px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%;letter-spacing: 0.544px\"><\/strong><strong style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">5.&nbsp;<\/span><\/strong><strong style=\"margin: 0px;padding: 0px;max-width: 100%;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247710722&amp;idx=1&amp;sn=1525450b1cd21fb737953390f8efe8ec&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\"><span style=\"text-decoration: underline\">AI\u65f6\u4ee3\u7684\u5b66\u4e60\uff1a\u5171\u63a2\u4eba\u7c7b\u5b66\u4e60\u7684\u590d\u6742\u6027<\/span><\/a><\/span><\/strong><\/section>\n<p style=\"margin: 0px 8px;padding: 0px;max-width: 100%;clear: both;min-height: 1em;font-style: normal;font-weight: 400;letter-spacing: 0.544px;text-align: justify;text-indent: 0px;text-transform: none;, Arial, sans-serif;font-size: 14px\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\">6.&nbsp;<\/span><\/strong><span style=\"margin: 0px;padding: 0px;max-width: 100%;text-decoration: underline\"><strong style=\"margin: 0px;padding: 0px;max-width: 100%\"><span style=\"margin: 0px;padding: 0px;max-width: 100%\"><a style=\"margin: 0px;padding: 0px;text-decoration: none;cursor: default;max-width: 100%\" href=\"https:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247710237&amp;idx=3&amp;sn=58ef1bd72e608f0778d03e8f271520c6&amp;scene=21#wechat_redirect\" data-itemshowtype=\"0\" target=\"_blank\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u63a2\u7d22\u8005\u8ba1\u5212 | \u96c6\u667a\u4ff1\u4e50\u90e82025\u5185\u5bb9\u56e2\u961f\u62db\u52df\uff08\u5168\u804c&amp;\u517c\u804c\uff09<\/a><\/span><\/strong><\/span><\/p>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.578px;line-height: 2em\"><span><br \/><\/span><\/p>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.578px;line-height: 2em\"><span style=\", Arial, sans-serif;font-size: 15px;letter-spacing: 0.578px;text-indent: 0em\"><span><br \/><\/span><\/span><\/p>\n<section style=\"margin-bottom: 0px;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.578px;line-height: 2em;margin-left: 0px;margin-right: 0px\"><span style=\"text-indent: 0em;font-family: PingFangSC-light;font-size: 15px;font-weight: 700;letter-spacing: 0.544px;text-align: left\"><span>\u70b9\u51fb\u201c\u9605\u8bfb\u539f\u6587\u201d\uff0c\u62a5\u540d\u8bfb\u4e66\u4f1a<\/span><\/span><\/section>\n<\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u6458\u8981 OpenAI\u7684O1\u53ca\u5176\u540e\u7eed\u7ade\u4e89\u8005\uff08\u5982DeepSeek R1\uff09\u7684\u53d1\u5e03\u663e\u8457\u63a8\u52a8\u4e86\u5927\u8bed\u8a00\u6a21\u578b\uff08Large Language Models\uff0cLLMs\uff09\u5728\u590d\u6742\u63a8\u7406\u65b9\u9762\u7684\u7814\u7a76\uff0c\u5f15\u53d1\u5b66\u672f\u754c\u4e0e\u5de5\u4e1a\u754c\u7684\u53cc\u91cd\u5173\u6ce8\u3002\u6b64\u9879\u8fdb\u5c55\u6fc0\u53d1\u4e86\u76f8\u5173\u6280\u672f\u6210\u679c\u7684\u590d\u73b0\u548c\u5728\u6b64\u57fa\u7840\u4e0a\u7684\u521b\u65b0\u3002\u4e3a\u7cfb\u7edf\u6784\u5efa\u8be5\u9886\u57df\u7684\u7814\u7a76\u6846\u67b6\uff0c\u672c\u6587\u4ece\u81ea\u6211\u8fdb\u5316\uff08se&#8230;<\/p>\n","protected":false},"author":0,"featured_media":58590,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[],"special":[],"_links":{"self":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/58651"}],"collection":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=58651"}],"version-history":[{"count":0,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/58651\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/media\/58590"}],"wp:attachment":[{"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=58651"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=58651"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=58651"},{"taxonomy":"special","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fspecial&post=58651"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}