{"id":48159,"date":"2024-02-03T19:16:25","date_gmt":"2024-02-03T11:16:25","guid":{"rendered":"https:\/\/swarma.org\/?p=48159"},"modified":"2024-02-03T19:16:25","modified_gmt":"2024-02-03T11:16:25","slug":"ai%e5%af%b9%e9%bd%90%e6%98%af%e6%8e%a7%e5%88%b6%e8%ae%ba%e8%bf%98%e6%98%af%e5%8d%9a%e5%bc%88%e8%ae%ba%ef%bc%9f","status":"publish","type":"post","link":"https:\/\/swarma.org\/?p=48159","title":{"rendered":"AI\u5bf9\u9f50\u662f\u63a7\u5236\u8bba\u8fd8\u662f\u535a\u5f08\u8bba\uff1f"},"content":{"rendered":"<div class='wxsyncmain'>\n<section data-mpa-powered-by=\"yiban.io\">\n<section powered-by=\"xiumi.us\">\n<section powered-by=\"xiumi.us\" style=\"margin-bottom: 0px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(63, 63, 63);font-family: PingFangSC-light;font-size: 15px;background-color: rgb(255, 255, 255);visibility: visible;\">\n<section style=\"outline: 0px;display: inline-block;width: 661px;vertical-align: top;background-color: rgb(246, 246, 246);visibility: visible;\">\n<section style=\"outline: 0px;visibility: visible;\">\n<p style=\"text-align: center;\"><img class=\"rich_pages wxw-img\" data-backh=\"325\" data-backw=\"578\" data-galleryid=\"\" data-imgfileid=\"100197803\" data-ratio=\"0.562962962962963\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"width: 100%;height: auto !important;\"  src=\"\/wp-content\/uploads\/2024\/02\/wxsync-2024-02-78c08c0899944b34f47510546aa22b26.jpeg\"  \/><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;outline: 0px;letter-spacing: 0.544px;visibility: visible;\">\n<section style=\"outline: 0px;width: 661px;visibility: visible;\">\n<section style=\"padding-right: 3px;outline: 0px;float: left;line-height: 1;visibility: visible;\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;text-align: left;visibility: visible;\">\n<section style=\"padding-left: 10px;outline: 0px;display: inline-block;width: auto;vertical-align: top;min-width: 10%;height: auto;border-left: 3px solid rgb(33, 166, 210);border-bottom-left-radius: 0px;line-height: 0;visibility: visible;\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;transform: translate3d(-13px, 0px, 0px);visibility: visible;\">\n<section style=\"outline: 0px;display: inline-block;width: 25px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px;border-color: rgb(33, 166, 210) rgb(33, 166, 210) rgb(15, 76, 129);visibility: visible;\"><br style=\"outline: 0px;visibility: visible;\"  \/><\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;transform: translate3d(-1px, 0px, 0px);visibility: visible;\">\n<section style=\"outline: 0px;text-align: justify;color: rgb(33, 166, 210);font-size: 16px;line-height: 1.5;visibility: visible;\">\n<p style=\"outline: 0px;visibility: visible;\"><strong style=\"outline: 0px;visibility: visible;\">\u5bfc\u8bed<\/strong><\/p>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;transform: translate3d(-13px, 0px, 0px) rotateX(180deg);visibility: visible;\">\n<section style=\"outline: 0px;display: inline-block;width: 24px;height: 10px;vertical-align: top;overflow: hidden;line-height: 0;border-style: solid solid none;border-width: 3px 3px 2px;border-radius: 0px;border-color: rgb(33, 166, 210) rgb(33, 166, 210) rgb(15, 76, 129);visibility: visible;\"><br style=\"outline: 0px;visibility: visible;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 4px;padding-left: 4px;outline: 0px;clear: right;box-shadow: rgb(0, 0, 0) 0px 0px 0px;visibility: visible;min-height: 4.5em !important;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 5px;margin-bottom: 5px;outline: 0px;visibility: visible;\">\n<section style=\"padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;visibility: visible;\">\n<p style=\"outline: 0px;clear: none;line-height: 2em;visibility: visible;\"><strong style=\"outline: 0px;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px;visibility: visible;\"><span style=\"outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(63, 63, 63);visibility: visible;\">\u672c\u6587\u662f\u5317\u4eac\u5927\u5b66\u8ba1\u7b97\u673a\u56fe\u7075\u73ed\u672c\u79d1\u751f\u3001\u5317\u5927\u5bf9\u9f50\u5c0f\u7ec4 (PKU-Alignemnt\uff09\u6210\u5458<strong style=\"color: rgb(63, 63, 63);font-family: PingFangSC-light;text-indent: 0em;text-wrap: wrap;background-color: rgb(246, 246, 246);outline: 0px;letter-spacing: 0.578px;font-size: 15px;visibility: visible;\"><span style=\"outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(63, 63, 63);visibility: visible;\">\u90b1\u5929\u5f02\u64b0\u5199\u7684\u6587\u7ae0\uff0c\u4ecb\u7ecd\u4e86AI\u5bf9\u9f50\u95ee\u9898\u7684\u4e24\u79cd\u4e3b\u8981\u601d\u8def\uff1a\u57fa\u4e8e\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60\uff08\u63a7\u5236\u8bba\u8fdb\u8def\uff09\uff0c\u5408\u4f5c\u9006\u5f3a\u5316\u5b66\u4e60\uff08\u535a\u5f08\u8bba\u8fdb\u8def\uff09\u3002<\/span><\/strong><strong style=\"color: rgb(63, 63, 63);font-family: PingFangSC-light;text-indent: 0em;text-wrap: wrap;background-color: rgb(246, 246, 246);outline: 0px;letter-spacing: 0.578px;font-size: 15px;visibility: visible;\"><span style=\"outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(63, 63, 63);visibility: visible;\">\u90b1\u5929\u5f02<\/span><\/strong>\u5728\u5317\u5927PAIR-Lab\u8fdb\u884cAI\u5bf9\u9f50\u7814\u7a76\uff0c\u5173\u6ce8\u5bf9\u9f50\u4e0e\u9053\u5fb7\u4ef7\u503c\u7684\u4ea4\u53c9\uff0c\u5e76\u5408\u4f5c\u64b0\u5199\u7efc\u8ff0\u6587\u7ae0 AI Alignment: A Comprehensive Survey\u3002<\/span><\/strong><\/p>\n<\/section>\n<section style=\"padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;\">\n<p style=\"outline: 0px;clear: none;line-height: 2em;\"><strong style=\"outline: 0px;letter-spacing: 0.578px;text-indent: 0em;font-size: 15px;\"><span style=\"outline: 0px;font-size: 13px;letter-spacing: 0.544px;text-decoration-style: solid;text-decoration-color: rgb(63, 63, 63);\"><br style=\"outline: 0px;\"  \/><\/span><\/strong><\/p>\n<\/section>\n<section style=\"padding-right: 8px;padding-left: 8px;outline: 0px;font-size: 13px;line-height: 2;letter-spacing: 0.544px;\">\n<p style=\"outline: 0px;clear: none;\"><strong style=\"outline: 0px;\"><span style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><span style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><span style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><strong style=\"outline: 0px;color: rgb(0, 0, 0);text-align: start;visibility: visible;\">\u7814\u7a76\u9886\u57df\uff1aAI\u5bf9\u9f50\uff0c\u57fa\u4e8e\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60\uff0c\u5408\u4f5c\u9006\u5f3a\u5316\u5b66\u4e60<\/strong><\/strong><\/span><\/strong><\/span><\/strong><\/span><\/strong><\/strong><\/span><\/strong><\/span><\/strong><\/span><\/strong><\/p>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;outline: 0px;\">\n<section style=\"outline: 0px;width: 661px;\">\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(63, 63, 63);font-family: PingFangSC-light;font-size: 15px;background-color: rgb(255, 255, 255);text-align: center;\">\n<section style=\"outline: 0px;vertical-align: middle;display: inline-block;line-height: 0;\"><img class=\"rich_pages wxw-img\" data-fileid=\"100098753\" data-imgfileid=\"100197997\" data-ratio=\"0.07314814814814814\" data-type=\"png\" data-w=\"1080\" style=\"outline: 0px;vertical-align: middle;visibility: visible !important;width: 677px !important;height: auto !important;\"  src=\"\/wp-content\/uploads\/2024\/02\/wxsync-2024-02-6f9f672988af7db277f019eec3e6e952.png\"  \/><\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-top: 0pt;margin-bottom: 0pt;outline: 0px;letter-spacing: 0.544px;white-space: normal;line-height: 1.7;font-family: PingFangSC-light;background-color: rgb(255, 255, 255);text-align: right;\">\n<p style=\"outline: 0px;color: rgb(73, 73, 73);font-size: 14.6667px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;font-size: 13px;text-decoration-style: solid;text-decoration-color: rgb(73, 73, 73);\">\u5317\u4eac\u5927\u5b66\u524d\u6cbf\u8ba1\u7b97\u7814\u7a76\u4e2d\u5fc3<\/span><span style=\"outline: 0px;letter-spacing: 0.544px;font-size: 13px;visibility: visible;\"><strong style=\"outline: 0px;color: rgb(0, 0, 0);visibility: visible;\">&nbsp;| \u6765\u6e90<\/strong><\/span><strong style=\"outline: 0px;font-size: 13px;letter-spacing: 0.544px;color: rgb(0, 0, 0);visibility: visible;\"><\/strong><\/p>\n<p style=\"outline: 0px;font-size: 14.6667px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;font-size: 13px;visibility: visible;\"><span style=\"outline: 0px;visibility: visible;\"><span style=\"outline: 0px;letter-spacing: 0.544px;visibility: visible;\"><span style=\"outline: 0px;visibility: visible;\"><span style=\"outline: 0px;color: rgb(73, 73, 73);\"><span style=\"color: rgb(73, 73, 73);font-size: 13px;letter-spacing: 0.544px;text-decoration: none solid rgb(73, 73, 73);\">\u90b1\u5929\u5f02<\/span><\/span><\/span><strong style=\"outline: 0px;color: rgb(0, 0, 0);visibility: visible;\">&nbsp;| \u4f5c\u8005<\/strong><\/span><\/span><\/span><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/p>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">AI\u5bf9\u9f50\uff1a\u6cd5\u5178\u7684\u7c7b\u6bd4<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5982\u679c\u8bf7\u4f60\u7f16\u5199\u4e00\u90e8\u65e0\u6240\u4e0d\u5305\u7684\u6cd5\u5178\uff0c\u5e76\u4e14\u8fd9\u90e8\u6cd5\u5178\u5c06\u96f7\u6253\u4e0d\u52a8\u3001\u4e0d\u7ecf\u4fee\u6539\u5730\u751f\u6548\u4e00\u767e\u5e74\uff0c\u4f60\u6709\u591a\u5927\u4fe1\u5fc3\u80fd\u591f\u7ed9\u51fa\u4e00\u4e2a\u5b8c\u5584\u7684\u65b9\u6848\uff1f<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u60f3\u5fc5\u4f1a\u9047\u5230\u8bf8\u591a\u56f0\u96be\uff0c\u5305\u62ec\u4f46\u4e0d\u9650\u4e8e\uff1a<\/span><\/p>\n<\/section>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9519\u8bef\u89c4\u8303\uff08Misspecification\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4f60\u6240\u8bbe\u60f3\u51fa\u7684\u6cd5\u6761\u867d\u7136\u770b\u4f3c\u5b8c\u5584\uff0c\u4f46\u53ef\u80fd\u5b58\u5728\u5f88\u591a\u9690\u542b\u7684\u6f0f\u6d1e\u3002\u5f53\u6709\u4eba\u8bd5\u56fe\u5229\u7528\u94bb\u6cd5\u6761\u7684\u7a7a\u5b50\u65f6\uff0c\u4f1a\u53d1\u73b0\u5b58\u5728\u8bb8\u591a\u53ef\u4e58\u4e4b\u673a\u3002\u8fd9\u53ef\u4ee5\u770b\u4f5c\u662f\u7ecf\u6d4e\u5b66\u4e2d\u53e4\u5fb7\u54c8\u7279\u5b9a\u5f8b<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08Goodhart&#8217;s Law\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7684\u4f53\u73b0\u2014\u2014\u4efb\u4f55\u8bc4\u4ef7\u6307\u6807\u4e00\u65e6\u88ab\u4f5c\u4e3a\u4f18\u5316\u76ee\u6807\uff0c\u5c31\u4e0d\u518d\u662f\u597d\u7684\u8bc4\u4ef7\u6307\u6807<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08\u8fd9\u91cc\u6211\u4eec\u628a \u201c\u9075\u4ece\u6cd5\u5178\u201d \u89c6\u4e3a\u4e00\u4e2a\u8bc4\u4ef7\u6307\u6807\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\uff0c\u8fd9\u662f\u56e0\u4e3a\u76f8\u5173\u6027\u4e0d\u4ee3\u8868\u56e0\u679c\u6027\uff0c\u6545\u5bf9\u4e00\u4e2a\u4e0e\u771f\u5b9e\u76ee\u6807\u5177\u6709\u76f8\u5173\u6027\u7684 \u201c\u4ee3\u7406\u76ee\u6807\u201d<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08proxy goal\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u65bd\u4ee5\u4f18\u5316\uff0c\u5f88\u53ef\u80fd\u56e0\u56e0\u679c\u5173\u8054\u7684\u7f3a\u5931\u800c\u5bfc\u81f4\u76f8\u5173\u6027\u7684\u6d88\u5931\u3002&nbsp;<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9519\u8bef\u6cdb\u5316\uff08Misgeneralization\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4f60\u5bf9\u8fd9\u90e8\u6cd5\u5178\u7684\u6784\u601d\uff0c\u53d1\u751f\u5728\u4f60\u5bf9\u5f53\u4e0b\u4e16\u754c\u7684\u7406\u89e3\u6846\u67b6\u4e0b\uff0c\u56e0\u6b64\u5b83\u4e0d\u53ef\u907f\u514d\u5730\u4ee5\u5f53\u4e0b\u4e16\u754c\u4e2d\u7684\u6982\u5ff5\u4e3a\u57fa\u7840\u3001\u5e76\u4f9d\u8d56\u4e8e\u8bf8\u591a\u5728\u5f53\u4e0b\u4e16\u754c\u4e2d\u6210\u7acb\u7684\u5047\u8bbe\u3002\u56e0\u6b64\uff0c\u82e5\u628a\u6cd5\u5178\u89c6\u4e3a\u4ece\u884c\u4e3a\u5230\u5408\u6cd5\u6027\u7684\u9884\u6d4b\u6a21\u578b\uff0c\u90a3\u4e48\u4e00\u767e\u5e74\u540e\u5b83\u5c06\u4f1a\u5728\u5206\u5e03\u5916\u8fd0\u4f5c\u3002\u5982\u679c\u767e\u5e74\u540e\u4e16\u754c\u5728\u4e00\u4e9b\u91cd\u8981\u65b9\u9762\u4e0e\u5f53\u4eca\u4e16\u754c\u6709\u672c\u8d28\u4e0d\u540c\uff0c\u5219\u6cd5\u5178\u4e2d\u7684\u4e00\u4e9b\u5185\u5bb9\u53ef\u80fd\u53d8\u5f97\u5b8c\u5168\u4e0d\u5408\u7406\uff0c\u4ece\u800c\u7ed9\u51fa\u5b8c\u5168\u4e0d\u5408\u7406\u7684\u5408\u6cd5\u6027\u5224\u65ad\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u81ea\u81f4\u7684\u5206\u5e03\u504f\u79fb\uff08Auto-Induced Distributional Shifts\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u6cd5\u5178\u672c\u8eab\u7684\u5185\u5bb9\u4f1a\u5bf9\u4e16\u754c\u7684\u53d1\u5c55\u8f68\u8ff9\u9020\u6210\u5f71\u54cd\uff0c\u56e0\u6b64\u524d\u8ff0\u7684\u5206\u5e03\u5916\u60c5\u5f62\u7684\u51fa\u73b0\uff0c\u751a\u81f3\u6709\u53ef\u80fd\u6b63\u662f\u5176\u81ea\u8eab\u9020\u6210\u7684\u3002\u8fd9\u610f\u5473\u7740\uff0c\u5728\u7f16\u5199\u8fd9\u90e8\u6cd5\u5178\u65f6\uff0c\u6211\u4eec\u6216\u8bb8\u4e0d\u5e94\u628a\u4e16\u754c\u8f68\u8ff9\u770b\u4f5c\u4e00\u4e2a\u7ed9\u5b9a\u7684\u91cf\uff0c\u800c\u5e94\u4ece\u4e92\u52a8\u7684\u89d2\u5ea6\u6765\u523b\u753b\u8fd9\u4e00\u95ee\u9898\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9053\u5fb7\u4e0d\u786e\u5b9a\u6027\uff08Moral Uncertainty\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u6cd5\u5178\u7684\u7f16\u5199\u5fc5\u5b9a\u6d89\u53ca\u9053\u5fb7\u5224\u65ad\uff0c\u6545\u4e00\u5b9a\u662f\u7279\u5b9a\u4e8e\u4e00\u7ec4\u5177\u4f53\u7684\u4ef7\u503c\u89c2\u7684\u3002\u7136\u800c\u56f4\u7ed5\u9053\u5fb7\u8bae\u9898\u7684\u8fa9\u8bba\u4ece\u53e4\u81f3\u4eca\u4ece\u672a\u505c\u606f\u8fc7\uff0c\u5c31\u8fde\u4e13\u7cbe\u4e8e\u6b64\u7684\u4f26\u7406\u5b66\u5bb6\u4e5f\u51e0\u4e4e\u4e0d\u80fd\u5c31\u4efb\u4f55\u95ee\u9898\u8fbe\u6210\u4e00\u81f4\u3002\u8fd9\u79cd\u60c5\u51b5\u4e0b\uff0c\u600e\u6837\u7684\u9053\u5fb7\u5224\u65ad\u662f\u5408\u7406\u7684\uff0c\u662f\u4e00\u4e2a\u5f88\u96be\u56de\u7b54\u7684\u95ee\u9898\u3002<\/span><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<section>\n<section powered-by=\"xiumi.us\">\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">AI\u5bf9\u9f50\uff1a\u5b9e\u9645\u4e2d\u8bd5\u56fe\u89e3\u51b3\u7684\u95ee\u9898<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<\/section>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5728\u5f53\u4e0b\u548c\u672a\u6765\uff0c \u201cAI \u5bf9\u9f50\u201c \u6307\u7684\u662f\u8ba9 AI \u7cfb\u7edf<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08\u5305\u62ec\u80fd\u529b\u8d85\u8d8a\u4eba\u7c7b\u7684 AI \u7cfb\u7edf\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7684\u4f18\u5316\u76ee\u6807\u4e0e\u4eba\u7c7b\u610f\u56fe\u3001\u4eba\u7c7b\u4ef7\u503c\u89c2\u76f8\u5339\u914d\uff0c\u800c\u5b83\u6240\u8981\u89e3\u51b3\u7684\u6b63\u662f\u4e0a\u8ff0\u8fd9\u4e9b\u95ee\u9898[1]\u3002\u6240\u6709\u8fd9\u56db\u79cd\u56f0\u96be\uff0c\u5728\u7ed9\u5f53\u4e0b\u548c\u672a\u6765\u7684 AI \u7cfb\u7edf\u8bbe\u5b9a\u76ee\u6807\u65f6\uff0c\u90fd\u5df2\u7ecf\u51fa\u73b0\u6216\u53ef\u80fd\u4f1a\u51fa\u73b0\u3002\u4e00\u4e9b\u4f8b\u5b50\u5982\uff1a<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u76ee\u6807\u9519\u8bef\u89c4\u8303\uff08Goal Misspecification\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u4e00\u7c7b\u7684\u4f8b\u5b50\u5728\u5404\u7c7b\u6e38\u620f\u548c\u4eff\u771f\u73af\u5883\u4e2d\u8f83\u4e3a\u5e38\u89c1\uff0c\u751a\u81f3\u6709\u4e00\u4e2a\u7f51\u9875\u4e13\u95e8\u7ef4\u62a4\u4e86\u4e00\u7cfb\u5217\u6b64\u7c7b\u6848\u4f8b[2]\u3002\u4e00\u4e2a\u8f83\u65b0\u7684\u4f8b\u5b50\u662f\u5927\u8bed\u8a00\u6a21\u578b\u7684\u6076\u8a00\u6076\u8bed\u884c\u4e3a\u2014\u2014\u8fd9\u4e9b\u884c\u4e3a\u5f80\u5f80\u4ece\u8bad\u7ec3\u96c6\u4e2d\u7684\u6076\u8bed\u4e60\u5f97\uff0c\u4f53\u73b0\u4e86\u8bad\u7ec3\u96c6\u8fd9\u4e00\u76ee\u6807\u89c4\u7ea6<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08goal specification\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7684\u4e0d\u7406\u60f3\u4e4b\u5904\u3002\u4e0d\u8fc7\u8fd9\u7c7b\u6076\u8a00\u6076\u8bed\u53ea\u662f\u4ece\u6570\u636e\u4e2d\u88ab\u52a8\u4e60\u5f97\u574f\u884c\u4e3a\uff0c\u5982\u679c\u5927\u8bed\u8a00\u6a21\u578b\u7684\u8bad\u7ec3\u6d41\u7a0b\u88ab\u4e0e\u5f3a\u5316\u5b66\u4e60\u4e00\u7c7b\u7684\u4e3b\u52a8\u89c4\u5212\u548c\u6267\u884c\u80fd\u529b\u7ed3\u5408\uff0c\u5219\u53ef\u80fd\u5e26\u6765\u66f4\u4e3a\u4e3b\u52a8\u7684 \u201c\u94bb\u6f0f\u6d1e\u201d \u884c\u4e3a\u3002\u4e0e\u4e4b\u76f8\u5173\u7684\u53e6\u4e00\u4f8b\u662f\u5927\u8bed\u8a00\u6a21\u578b\u4e2d\u7684\u5956\u52b1\u8fc7\u5ea6\u4f18\u5316<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08Reward Overoptimization\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u73b0\u8c61\uff1a\u82e5\u5927\u8bed\u8a00\u6a21\u578b\u7684\u5bf9\u9f50\u8bad\u7ec3\u91c7\u7528\u4e60\u5f97\u7684\u4eba\u7c7b\u504f\u597d\u6a21\u578b\u4e3a\u4f18\u5316\u76ee\u6807\uff0c\u5219\u4f18\u5316\u5230\u4e00\u5b9a\u7a0b\u5ea6\u540e\uff0c\u4e0e\u771f\u5b9e\u4eba\u7c7b\u504f\u597d\u7684\u543b\u5408\u7a0b\u5ea6\u4f1a\u968f\u8bad\u7ec3\u800c\u4e0b\u964d[3]\u2014\u2014\u8fd8\u8bb0\u5f97\u53e4\u5fb7\u54c8\u7279\u5b9a\u5f8b\u5417\uff1f<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;text-align: center;\"><img class=\"rich_pages wxw-img\" data-ratio=\"0.6555555555555556\" data-s=\"300,640\" data-w=\"1080\" style=\"vertical-align: middle;max-width: 100%;width: 100%;box-sizing: border-box;height: auto !important;\" width=\"100%\"  src=\"\/wp-content\/uploads\/2024\/02\/wxsync-2024-02-3864b3a31ba202a2cb4a6a9d1a1aa9c9.png\"  \/><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u76ee\u6807\u9519\u8bef\u6cdb\u5316\uff08Goal Misgeneralization\uff09\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4f18\u5316\u76ee\u6807\u7684\u9519\u8bef\u6cdb\u5316\u5728\u6e38\u620f\u548c\u4eff\u771f\u73af\u5883\u4e2d\u4e5f\u65f6\u6709\u51fa\u73b0\uff0c\u4e5f\u540c\u6837\u51fa\u73b0\u4e86\u7ef4\u62a4\u6b64\u7c7b\u6848\u4f8b\u7684\u6570\u636e\u5e93[4]\u3002\u800c\u5728\u5927\u8bed\u8a00\u6a21\u578b\u7684 Scaling Laws \u76db\u6781\u4e00\u65f6\u7684\u73b0\u5728\uff0c\u4e00\u4e2a\u53d7\u5230\u8f83\u591a\u62c5\u5fe7\u7684\u53ef\u80fd\u6027\u662f\u4ece\u201c\u4eba\u7c7b\u53ef\u6709\u6548\u76d1\u7763\u201d\u60c5\u666f\u5230\u201c\u4eba\u7c7b\u96be\u4ee5\u6709\u6548\u76d1\u7763\u201d\u60c5\u666f\u7684\u6cdb\u5316\u8fc7\u7a0b\u4e2d\uff0c\u662f\u5426\u4f1a\u51fa\u73b0\u4f18\u5316\u76ee\u6807\u7684\u8868\u5f81\u7684\u9519\u8bef\u6cdb\u5316\u3002\u6613\u89c1\u76d1\u7763\u8bad\u7ec3\u53ea\u80fd\u53d1\u751f\u5728\u524d\u8005\u60c5\u5883\u4e0b\uff0c\u6545\u540e\u8005\u5c06\u4f1a\u4f9d\u8d56\u4e8e\u5206\u5e03\u5916\u6cdb\u5316\u3002\u5df2\u7ecf\u51fa\u73b0\u7684\u9519\u8bef\u6cdb\u5316\u65e9\u671f\u5f81\u5146\u5305\u62ec\u5927\u8bed\u8a00\u6a21\u578b\u7684\u201c\u8c04\u5a9a\u201d\u503e\u5411\uff0c\u5373\u8fce\u5408\u7528\u6237\u89c2\u70b9\u7684\u503e\u5411\u2014\u2014\u8bc1\u636e\u8868\u660e\u8fd9\u5f88\u53ef\u80fd\u662f\u5728\u5bf9\u9f50\u8bad\u7ec3\u4e2d\uff0c\u4eba\u7c7b\u76d1\u7763\u8005\u6613\u53d7\u8c04\u5a9a\u5f71\u54cd\u7684\u503e\u5411\u6240\u81f4[5]\u3002&nbsp;<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u81ea\u81f4\u7684\u5206\u5e03\u504f\u79fb\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u65b9\u9762\u7684\u7814\u7a76\u5c1a\u5904\u4e8e\u65e9\u671f\u9636\u6bb5[6]\uff0c\u4f46\u4e00\u4e9b\u53d7\u5230\u5173\u6ce8\u7684\u95ee\u9898\u5305\u62ec\u63a8\u8350\u7cfb\u7edf\u4e0e\u7528\u6237\u4e4b\u95f4\u5f62\u6210\u7684\u53cd\u9988\u56de\u8def \uff0c\u548c\u5927\u8bed\u8a00\u6a21\u578b\u4e0e\u4e92\u8054\u7f51\u6587\u672c\u4e4b\u95f4\u5f62\u6210\u7684\u53cd\u9988\u56de\u8def[7]\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9053\u5fb7\u4e0d\u786e\u5b9a\u6027\uff1a<\/span><\/strong><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u6709\u5173\u81ea\u52a8\u9a7e\u9a76\u573a\u666f\u4e0b\u7684\u9053\u5fb7\u95ee\u9898\u8ba8\u8bba\u3001AI \u7cfb\u7edf\u4e2d\u504f\u89c1\u4e0e\u6b67\u89c6\u7684\u8ba8\u8bba\uff0c\u90fd\u662f\u8be5\u95ee\u9898\u7684\u5df2\u6709\u6848\u4f8b\u3002\u968f\u7740 AI \u7cfb\u7edf\u5728\u793e\u4f1a\u4e2d\u65e5\u6e10\u5e7f\u6cdb\u7684\u5e94\u7528\uff0cAI \u7684\u9053\u5fb7\u5224\u65ad\u7684\u5f71\u54cd\u8303\u56f4\u548c\u5f71\u54cd\u7a0b\u5ea6\u53ef\u80fd\u8fd8\u5c06\u5927\u5e45\u6269\u5927\u3002\u4ece\u66f4\u9ad8\u7684\u89d2\u5ea6\u770b\uff0c\u4ef7\u503c\u5206\u6b67\u7684\u535a\u5f08\u523b\u753b\u7b49\u95ee\u9898\uff0c\u90fd\u662f\u8fd9\u4e00\u95ee\u9898\u4e2d\u503c\u5f97\u8003\u8651\u7684\u5bf9\u8c61\u3002<br style=\"box-sizing: border-box;\"  \/><\/span><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<\/section>\n<section>\n<section powered-by=\"xiumi.us\">\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">AI\u5bf9\u9f50\u7684\u201c\u63a7\u5236\u8bba\u8fdb\u8def\u201d<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u57fa\u4e8e\u4eba\u7c7b\u53cd\u9988\u7684\u5f3a\u5316\u5b66\u4e60<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08Reinforcement Learning from Human Feedback, RLHF\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u662f\u81f3\u4eca\u4e3a\u6b62\uff0cAI \u5bf9\u9f50\u4e2d\u6beb\u65e0\u4e89\u8bae\u7684\u6700\u4e3b\u6d41\u3001\u6700\u6210\u719f\u7684\u7b97\u6cd5\u4e4b\u4e00\u3002\u5b83\u7684\u601d\u8def\u662f\u5148\u4ece\u4eba\u7c7b\u6570\u636e\u4e60\u5f97\u4e00\u4e2a\u4eba\u7c7b\u504f\u597d\u6a21\u578b\uff0c\u518d\u4ee5\u8be5\u504f\u597d\u6a21\u578b\u4e3a\u4f18\u5316\u76ee\u6807\uff0c\u5bf9\u5927\u8bed\u8a00\u6a21\u578b\u7528\u5f3a\u5316\u5b66\u4e60\u4f5c\u5fae\u8c03\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u4e00\u7b97\u6cd5\u5176\u5b9e\u4ee3\u8868\u4e86 AI \u5bf9\u9f50\u4e2d\u4e24\u79cd\u4e3b\u8981\u7684\u601d\u8def\u4e4b\u4e00\uff0c\u4e0d\u59a8\u79f0\u4e4b\u4e3a\u201c\u63a7\u5236\u8bba\u8fdb\u8def\u201d\u3002\u8fd9\u79cd\u601d\u8def\u5047\u5b9a\uff0cAI \u7cfb\u7edf\u6240\u771f\u6b63\u5e94\u5bf9\u9f50\u7684\u76ee\u6807\uff0c\u5176\u5bf9\u4eba\u7c7b\u800c\u8a00\u662f\u6e05\u6670\u660e\u4e86\u7684\uff0c\u800c\u95ee\u9898\u4ec5\u5728\u4e8e\u6709\u6548\u5730\u786e\u4fdd\u8fd9\u4e00\u76ee\u6807\u88ab AI \u6240\u6267\u884c\uff0c\u786e\u4fdd\u9519\u8bef\u89c4\u8303\u548c\u9519\u8bef\u6cdb\u5316\u90fd\u4e0d\u4f1a\u53d1\u751f\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u4e00\u8fdb\u8def\u7684\u4f18\u52bf\u5728\u4e8e\u5b83\u7684\u7b80\u6d01\u6027\uff0c\u901a\u8fc7\u628a\u95ee\u9898\u7684\u8303\u56f4\u7f29\u5c0f\u800c\u83b7\u5f97\u4e86\u66f4\u9ad8\u7684\u5b9e\u9645\u53ef\u884c\u6027\u2014\u2014RLHF \u8fd9\u4e00\u6700\u6210\u719f\u65b9\u6cd5\u5f52\u5c5e\u4e8e\u8fd9\u4e00\u7c7b\u8fdb\u8def\uff0c\u8fd9\u7edd\u4e0d\u662f\u5de7\u5408\u3002\u4f46\u540c\u65f6\uff0c\u5b83\u4e5f\u5ffd\u7565\u4e86\u4eba\u7c7b\u81ea\u5df1\u5bf9\u4e8e\u76ee\u6807\u548c\u4ef7\u503c\u89c2\u7684\u5206\u6b67\u3001\u4e0d\u786e\u5b9a\u6027\u3001\u968f\u65f6\u95f4\u6f14\u5316\u7b49\u7279\u6027\uff0c\u5e76\u4e14\u628a\u88ab\u63a7\u5236\u8005<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08AI \u7cfb\u7edf\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u4e0e\u63a7\u5236\u8005<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08\u4eba\u7c7b\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u7f6e\u4e8e\u5bf9\u6297\u7684\u5173\u7cfb\u4e0b\uff0c\u8fd9\u5bf9\u4e8e\u63a7\u5236\u80fd\u529b\u5f3a\u4e8e\u4eba\u7c7b\u7684 AI \u7cfb\u7edf\u662f\u4e0d\u5229\u7684\u3002<\/span><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<section powered-by=\"xiumi.us\">\n<section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/p>\n<h3 style=\"outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;white-space: normal;letter-spacing: 0.544px;background-color: rgb(255, 255, 255);\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;text-align: right;font-size: 13px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 10px;outline: 0px;letter-spacing: 0.544px;text-align: center;\">\n<section style=\"outline: 0px;display: inline-block;vertical-align: middle;\">\n<section style=\"margin-bottom: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;clear: both;line-height: 0;\">\n<section style=\"outline: 0px;line-height: 0;width: 0px;\"><svg viewbox=\"0 0 1 1\" style=\"vertical-align:top;\"><\/svg><\/section>\n<\/section>\n<\/section>\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;border-left: 3px solid rgb(33, 166, 210);border-right: 3px solid rgb(33, 166, 210);border-top-color: rgb(33, 166, 210);border-bottom-color: rgb(33, 166, 210);font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;caret-color: rgb(89, 89, 89);\"><strong style=\"outline: 0px;color: rgb(61, 170, 214);letter-spacing: 0.578px;\">AI\u5bf9\u9f50\u7684\u201c\u535a\u5f08\u8bba\u8fdb\u8def\u201d<\/strong><\/span><\/strong><\/strong><\/p>\n<\/section>\n<section style=\"margin-top: -2px;outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section style=\"outline: 0px;float: left;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"outline: 0px;float: right;width: 8px;height: 3px;background-color: rgb(33, 166, 210);line-height: 0;\"><br style=\"outline: 0px;\"  \/><\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<\/h3>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><br  \/><\/p>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u5408\u4f5c\u9006\u5f3a\u5316\u5b66\u4e60<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08Cooperative Inverse Reinforcement Learning, CIRL\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u662f\u53e6\u4e00\u7c7b\u65b9\u6cd5\u4e2d\u7684\u4ee3\u8868[1][8]\u3002\u5b83\u7684\u6838\u5fc3\u601d\u60f3\u662f\uff0c\u628a\u4eba\u7c7b\u4e0e AI \u7cfb\u7edf\u89c6\u4e3a\u540c\u4e00\u73af\u5883\u4e2d\u7684\u4e24\u4e2a\u5e73\u7b49\u884c\u52a8\u8005\uff0c\u4e8c\u8005\u5171\u4eab\u4e00\u4e2a\u76ee\u6807<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08\u5373\u5956\u52b1\u51fd\u6570\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\uff0c\u4f46\u53ea\u6709\u4eba\u7c7b\u80fd\u83b7\u5f97\u5956\u52b1\u4fe1\u53f7\uff0c\u800c AI \u7cfb\u7edf\u5219\u53ea\u80fd\u4ece\u4eba\u7c7b\u884c\u4e3a\u4e2d\u63a8\u65ad\u5956\u52b1\u51fd\u6570\u7684\u5185\u5bb9 \u2014\u2014 \u5373 \u201c\u4eba\u7c7b\u5230\u5e95\u60f3\u8981\u4ec0\u4e48\u201d \u3002\u5e76\u4e14\uff0c\u56e0\u4e3a AI \u59cb\u7ec8\u6301\u6709\u5bf9\u5956\u52b1\u51fd\u6570\u7684\u4e0d\u786e\u5b9a\u6027\uff0c\u4eba\u7c7b\u4f5c\u4e3a\u4fe1\u606f\u6765\u6e90\u7684\u91cd\u8981\u6027\u610f\u5473\u7740 AI \u8bef\u5bfc\u548c\u64cd\u7eb5\u4eba\u7c7b\u7684\u52a8\u673a\u5c06\u4f1a\u964d\u4f4e<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08\u4f46\u4e0d\u4e00\u5b9a\u6d88\u5931\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u8fd9\u4e00\u65b9\u6cd5\uff0c\u672c\u8d28\u4e0a\u662f\u901a\u8fc7\u5c06\u4eba\u7c7b\u4e0e AI \u7cfb\u7edf\u7f6e\u4e8e\u5408\u4f5c\u7684\u5173\u7cfb\u4e2d\uff0c\u4ee5\u51cf\u5c11\u4e8c\u8005\u5bf9\u6297\u7684\u52a8\u673a\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u9664\u4e86\u8be5\u65b9\u6cd5\u5916\uff0c\u4e0e\u793e\u4f1a\u9009\u62e9\u7406\u8bba<\/span><span style=\"font-size: 15px;color: rgb(136, 136, 136);\">\uff08Social Choice Theory\uff09<\/span><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u3001\u535a\u5f08\u8bba\u7b49\u7ed3\u5408\u7684\u4e00\u4e9b\u5176\u4ed6 AI \u5bf9\u9f50\u65b9\u6cd5\uff0c\u5219\u6709\u7740\u4e0d\u540c\u7684\u4f18\u70b9[9]\u3002\u5b83\u4eec\u901a\u8fc7\u663e\u5f0f\u5730\u523b\u753b\u4e0d\u540c\u884c\u52a8\u8005\u4e4b\u95f4\u76ee\u6807\u548c\u4ef7\u503c\u89c2\u7684\u51b2\u7a81\uff0c\u4f7f\u5f97\u6211\u4eec\u53ef\u4ee5\u76f4\u9762\u9053\u5fb7\u4e0d\u786e\u5b9a\u6027\u3001\u590d\u6742\u793e\u4f1a\u4e92\u52a8\u7b49\u56f0\u96be\u95ee\u9898\u3002<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\">\u53e6\u4e00\u65b9\u9762\uff0c\u8fd9\u7c7b\u65b9\u6cd5\u8f83\u9ad8\u7684\u590d\u6742\u7a0b\u5ea6\uff0c\u4e5f\u610f\u5473\u7740\u5b83\u4eec\u7684\u5de5\u7a0b\u53ef\u5b9e\u73b0\u6027\u4e5f\u5f80\u5f80\u8f83\u4f4e\u3002\u5982\u4f55\u80fd\u5c06\u8fd9\u4e9b\u65b9\u6cd5\u4f7f\u7528\u5728\u5b9e\u9645\u89c4\u6a21\u7684 AI \u5e94\u7528\u4e0a\uff0c\u662f\u4e00\u4e2a\u4e9f\u5f85\u89e3\u51b3\u7684\u95ee\u9898\u3002<\/span><\/p>\n<\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><br  \/><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"color: rgb(33, 166, 210);\"><strong><span style=\"color: rgb(33, 166, 210);font-size: 15px;\">\u53c2\u8003\u6587\u732e<\/span><\/strong><\/span><\/p>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[1] Ji, J.&nbsp;et al.&nbsp;(2023). AI Alignment: A Comprehensive survey. arXiv. https:\/\/doi.org\/10.48550\/arxiv.2310.19852<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[2] https:\/\/docs.google.com\/spreadsheets\/d\/e\/2PACX-1vRPiprOaC3HsCf5Tuum8bRfzYUiKLRqJmbOoC-32JorNdfyTiRRsR7Ea5eWtvsWzuxo8bjOxCG84dAg\/pubhtml<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[3] Gao, L., John, S., &amp; Hilton, J. Scaling laws for reward model overoptimization. ICML 2023. https:\/\/dl.acm.org\/doi\/10.5555\/3618408.3618845<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[4] https:\/\/docs.google.com\/spreadsheets\/d\/e\/2PACX-1vTo3RkXUAigb25nP7gjpcHriR6XdzA_L5loOcVFj_u7cRAZghWrYKH2L2nU4TA_Vr9KzBX5Bjpz9G_l\/pubhtml<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[5] Perez, E. et al. Discovering Language Model Behaviors with Model-Written Evaluations. Findings of the Association for Computational Linguistics: ACL 2023. https:\/\/doi.org\/10.18653\/v1\/2023.findings-acl.847<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[6] Krueger, D. A., Maharaj, T., &amp; Leike, J. (2020). Hidden Incentives for Auto-Induced Distributional Shift. arXiv. https:\/\/doi.org\/10.48550\/arxiv.2009.09153<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[7] Taori, R., &amp; Hashimoto, T. (2022). Data feedback loops: model-driven amplification of dataset biases. arXiv. https:\/\/doi.org\/10.48550\/arxiv.2209.03942<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[8] Hadfield-Menell, D., Russell, S., Abbeel, P., &amp; Dragan, A. D. Cooperative inverse reinforcement learning. NeurIPS 2016. https:\/\/dl.acm.org\/doi\/10.5555\/3157382.3157535<\/span><\/p>\n<p style=\"margin-left: 8px;margin-right: 8px;text-align: left;line-height: normal;margin-bottom: 8px;\"><span style=\"color: rgb(136, 136, 136);font-size: 13px;\">[9] Critch, A., &amp; Krueger, D. W. (2020). AI Research Considerations for Human Existential Safety (ARCHES). arXiv. http:\/\/export.arxiv.org\/pdf\/2006.04948<\/span><\/p>\n<\/section>\n<section powered-by=\"xiumi.us\">\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><\/span><\/p>\n<section style=\"margin-bottom: 5px;outline: 0px;color: rgb(136, 136, 136);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);\"><span style=\"outline: 0px;font-size: 16px;\"><strong style=\"outline: 0px;\"><span style=\"outline: 0px;color: rgb(171, 25, 66);\"><br  \/><\/span><\/strong><\/span><\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-bottom: 0px;outline: 0px;\">\n<section style=\"padding-right: 6px;padding-left: 6px;outline: 0px;display: inline-block;width: 661px;vertical-align: top;border-left: 5px solid rgb(33, 166, 210);border-bottom-left-radius: 0px;\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;font-size: 16px;color: rgb(0, 0, 0);line-height: 1.4;\">\n<p style=\"outline: 0px;white-space: normal;\"><strong style=\"outline: 0px;\">\u4f5c\u8005\u7b80\u4ecb<\/strong><\/p>\n<\/section>\n<\/section>\n<\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-bottom: 0px;outline: 0px;\">\n<p style=\"outline: 0px;white-space: normal;\"><br  \/><\/p>\n<\/section>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);text-align: center;\"><img class=\"rich_pages wxw-img js_insertlocalimg\" data-imgfileid=\"100198001\" data-ratio=\"1.1481481481481481\" data-s=\"300,640\" data-type=\"jpeg\" data-w=\"1080\" style=\"outline: 0px;width: 179px !important;visibility: visible !important;height: auto !important;\"  src=\"\/wp-content\/uploads\/2024\/02\/wxsync-2024-02-6098442bbac2e8d4b1c9280d148361ad.jpeg\"  \/><\/p>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;font-family: system-ui, -apple-system, &quot;system-ui&quot;, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);line-height: 1.75em;\"><strong style=\"outline: 0px;\"><span style=\"outline: 0px;font-size: 15px;color: rgb(63, 63, 63);\">\u90b1\u5929\u5f02<\/span><\/strong><span style=\"outline: 0px;font-size: 15px;color: rgb(63, 63, 63);\">\u662f\u5317\u4eac\u5927\u5b66\u8ba1\u7b97\u673a\u56fe\u7075\u73ed\u7684\u4e09\u5e74\u7ea7\u672c\u79d1\u751f\uff0c\u5317\u5927\u5bf9\u9f50\u5c0f\u7ec4 (PKU-Alignment) \u6210\u5458\uff0c\u5728\u5317\u5927PAIR-Lab\u8fdb\u884cAI\u5bf9\u9f50\u7814\u7a76\uff0c\u5173\u6ce8\u5bf9\u9f50\u4e0e\u9053\u5fb7\u4ef7\u503c\u7684\u4ea4\u53c9\uff0c\u5e76\u64b0\u5199AI Alignment: A Comprehensive Survey\uff0c\u7efc\u8ff0\u4e3b\u9875\uff1awww.alignmentsurvey.com\u3002\u4ed6\u66fe\u83b7John Hopcroft\u5956\u5b66\u91d1\u3001NOI 2020\u91d1\u724c\u3002<\/span><\/section>\n<section style=\"margin-bottom: 5px;outline: 0px;color: rgb(136, 136, 136);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);\"><span style=\"outline: 0px;font-size: 16px;\"><strong style=\"outline: 0px;\"><span style=\"outline: 0px;color: rgb(171, 25, 66);\"><br  \/><\/span><\/strong><\/span><\/section>\n<section style=\"margin-bottom: 5px;outline: 0px;color: rgb(136, 136, 136);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px;white-space: normal;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section powered-by=\"xiumi.us\" style=\"margin-top: 10px;margin-bottom: 0px;outline: 0px;color: rgb(136, 136, 136);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);text-size-adjust: inherit;text-align: center;justify-content: center;\">\n<section style=\"outline: 0px;display: inline-block;width: auto;vertical-align: top;min-width: 10%;height: auto;box-shadow: rgb(0, 0, 0) 0px 0px 0px;border-bottom: 9px solid rgb(169, 215, 227);border-bottom-right-radius: 0px;\">\n<section powered-by=\"xiumi.us\" style=\"margin-bottom: -15px;outline: 0px;\">\n<section style=\"padding-right: 10px;padding-left: 10px;outline: 0px;color: rgb(12, 130, 169);font-size: 16px;line-height: 2;letter-spacing: 3px;\">\n<p style=\"outline: 0px;\"><strong style=\"outline: 0px;\">\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\u8bfb\u4e66\u4f1a<\/strong><br style=\"outline: 0px;\"  \/><\/p>\n<\/section>\n<\/section>\n<\/section>\n<\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;text-size-adjust: inherit;line-height: 1.75em;text-align: center;\"><br style=\"outline: 0px;\"  \/><\/section>\n<p style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;text-size-adjust: inherit;line-height: 2em;\"><span style=\"outline: 0px;font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;letter-spacing: 0.578px;background-color: rgb(255, 255, 255);font-size: 15px;\">\u5927\u6a21\u578b\u7684\u72c2\u98d9\u7a81\u8fdb\u5524\u9192\u4e86\u4eba\u4eec\u5bf9AI\u6280\u672f\u7684\u70ed\u60c5\u548c\u61a7\u61ac\uff0c\u4e5f\u5f15\u53d1\u4e86\u5bf9AI\u6280\u672f\u672c\u8eab\u5b58\u5728\u7684\u793e\u4f1a\u4f26\u7406\u98ce\u9669\u53ca\u5176\u5bf9\u4eba\u7c7b\u751f\u5b58\u6784\u6210\u7684\u6f5c\u5728\u5a01\u80c1\u7684\u666e\u904d\u62c5\u5fe7\u3002\u5728\u6b64\u80cc\u666f\u4e0b\uff0cAI\u5b89\u5168\u4e0e\u5bf9\u9f50\u5f97\u5230\u5e7f\u6cdb\u5173\u6ce8\uff0c\u8fd9\u662f\u4e00\u4e2a\u81f4\u529b\u4e8e\u8ba9AI\u9020\u798f\u4eba\u7c7b\uff0c\u907f\u514dAI\u6a21\u578b\u5931\u63a7\u6216\u88ab\u6ee5\u7528\u800c\u5bfc\u81f4\u707e\u96be\u6027\u540e\u679c\u7684\u7814\u7a76\u65b9\u5411\u3002\u96c6\u667a\u4ff1\u4e50\u90e8\u548c\u5b89\u8fdcAI\u8054\u5408\u4e3e\u529e<a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247679496&amp;idx=1&amp;sn=2f889356c9fcf6fb460dcd87f05036af&amp;chksm=e8996485dfeeed937ca8c59e909b6c991d2ff1cbd132117a91bf54acf9e6205bd0ab2a9e4fb2&amp;scene=21#wechat_redirect\" textvalue=\"\u300c\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\u300d\u8bfb\u4e66\u4f1a\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\" style=\"outline: 0px;color: var(--weui-LINK);-webkit-user-drag: none;cursor: pointer;\" localeditorid=\"pjn2a1b4n3w44878qo\" rel=\"noopener noreferrer\">\u300c\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\u300d\u8bfb\u4e66\u4f1a<\/a>\uff0c\u7531\u591a\u4f4d\u6d77\u5185\u5916\u4e00\u7ebf\u7814\u7a76\u8005\u8054\u5408\u53d1\u8d77\uff0c\u65e8\u5728\u6df1\u5165\u63a2\u8ba8AI\u5b89\u5168\u4e0e\u5bf9\u9f50\u6240\u6d89\u53ca\u7684\u6838\u5fc3\u6280\u672f\u3001\u7406\u8bba\u67b6\u6784\u3001\u89e3\u51b3\u8def\u5f84\u4ee5\u53ca\u5b89\u5168\u6cbb\u7406\u7b49\u4ea4\u53c9\u8bfe\u9898\u3002<\/span><\/p>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;\"><span style=\"outline: 0px;font-size: 15px;color: rgb(63, 63, 63);\"><strong style=\"outline: 0px;\"><br style=\"outline: 0px;\"  \/><\/strong><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;text-align: center;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247679496&amp;idx=1&amp;sn=2f889356c9fcf6fb460dcd87f05036af&amp;chksm=e8996485dfeeed937ca8c59e909b6c991d2ff1cbd132117a91bf54acf9e6205bd0ab2a9e4fb2&amp;scene=21#wechat_redirect\" textvalue=\"\u4f60\u5df2\u9009\u4e2d\u4e86\u6dfb\u52a0\u94fe\u63a5\u7684\u5185\u5bb9\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"1\" hasload=\"1\" rel=\"noopener noreferrer\"><span class=\"js_jump_icon h5_image_link\" style=\"outline: 0px;vertical-align: bottom;user-select: none;width: 661px;\"><img class=\"rich_pages wxw-img\" data-backh=\"321\" data-backw=\"562\" data-cropselx1=\"0\" data-cropselx2=\"562\" data-cropsely1=\"0\" data-cropsely2=\"282\" data-imgfileid=\"100198000\" data-ratio=\"0.5714285714285714\" data-type=\"jpeg\" data-w=\"1050\" style=\"outline: 0px;border-width: 0px;border-style: initial;border-color: initial;width: 661px !important;visibility: visible !important;height: auto !important;\"  src=\"\/wp-content\/uploads\/2024\/02\/wxsync-2024-02-f427e272d561a1d6b8a967d2d485d8f8.png\"  \/><\/span><\/a><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;\"><br style=\"outline: 0px;\"  \/><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;\"><span style=\"color: rgb(63, 63, 63);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px;\">\u8be6\u60c5\u8bf7\u89c1\uff1a<\/span><br  \/><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;\"><span style=\"outline: 0px;font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;color: rgb(63, 63, 63);font-size: 15px;letter-spacing: 0.578px;text-decoration: underline;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247679496&amp;idx=1&amp;sn=2f889356c9fcf6fb460dcd87f05036af&amp;chksm=e8996485dfeeed937ca8c59e909b6c991d2ff1cbd132117a91bf54acf9e6205bd0ab2a9e4fb2&amp;scene=21#wechat_redirect\" textvalue=\"2024\u5f00\u5e74\u8bfb\u4e66\u4f1a\uff1aAI\u5b89\u5168\u4e0e\u5bf9\u9f50\u2014\u2014\u5e94\u5bf9\u524d\u6cbfAI\u5931\u63a7\u4e0e\u6ee5\u7528\u7684\u6280\u672f\u8def\u7ebf\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\" style=\"outline: 0px;color: var(--weui-LINK);-webkit-user-drag: none;cursor: pointer;\" rel=\"noopener noreferrer\">2024\u5f00\u5e74\u8bfb\u4e66\u4f1a\uff1aAI\u5b89\u5168\u4e0e\u5bf9\u9f50\u2014\u2014\u5e94\u5bf9\u524d\u6cbfAI\u5931\u63a7\u4e0e\u6ee5\u7528\u7684\u6280\u672f\u8def\u7ebf<\/a><\/span><span style=\"outline: 0px;color: rgb(63, 63, 63);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;font-size: 15px;letter-spacing: 0.544px;\"><br style=\"outline: 0px;\"  \/><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 0px;margin-left: 8px;outline: 0px;color: rgb(136, 136, 136);font-size: 14px;letter-spacing: 0.544px;white-space: normal;background-color: rgb(255, 255, 255);font-family: system-ui, -apple-system, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;line-height: 1.75em;\"><br  \/><\/section>\n<p style=\"margin-left: 8px;margin-right: 8px;margin-bottom: 0px;line-height: 1.75em;\"><span style=\"font-size: 15px;color: rgb(63, 63, 63);\"><br  \/><\/span><\/p>\n<section powered-by=\"xiumi.us\" style=\"margin-bottom: 0px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(136, 136, 136);font-size: 14px;background-color: rgb(255, 255, 255);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\">\n<section powered-by=\"xiumi.us\" style=\"outline: 0px;letter-spacing: 0.544px;\">\n<p style=\"margin-right: 8px;margin-bottom: 5px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;color: rgb(0, 0, 0);font-size: 16px;text-align: center;font-family: mp-quote, -apple-system-font, BlinkMacSystemFont, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\"><strong style=\"outline: 0px;text-align: left;color: rgb(33, 166, 210);font-family: PingFangSC-light;letter-spacing: 0.544px;\"><span style=\"outline: 0px;font-size: 15px;letter-spacing: 0.544px;\">\u63a8\u8350\u9605\u8bfb<\/span><\/strong><\/strong><\/p>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;\">1.&nbsp;<\/strong><span style=\"text-decoration: underline;\"><strong style=\"outline: 0px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247679853&amp;idx=1&amp;sn=1ea872fcd9ba2bf1bd5ac217484d0a3b&amp;chksm=e89965e0dfeeecf60e4b530bc2ad6b1a68a83bfd2ed1f6d715792811adf2b2ec0e5fee7e09c4&amp;scene=21#wechat_redirect\" textvalue=\"\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\uff1a\u590d\u6742\u7cfb\u7edf\u89c6\u89d2\u4e0b\u7684AI\u5b89\u5168\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u5927\u6a21\u578b\u5b89\u5168\u4e0e\u5bf9\u9f50\uff1a\u590d\u6742\u7cfb\u7edf\u89c6\u89d2\u4e0b\u7684AI\u5b89\u5168<\/a><\/strong><\/span><strong style=\"outline: 0px;\"><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;\">2.&nbsp;<\/strong><span style=\"text-decoration: underline;\"><strong style=\"outline: 0px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247677327&amp;idx=2&amp;sn=a7bc9861fce673708e659807bf8ff325&amp;chksm=e8996f02dfeee6141faedeccef5d48efb424fe076be79d292d0f8b9091bb21eec9bb33199b2e&amp;scene=21#wechat_redirect\" textvalue=\"\u4e07\u5b57\u957f\u6587\u8be6\u89e3\uff1a\u5927\u6a21\u578b\u65f6\u4ee3AI\u4ef7\u503c\u5bf9\u9f50\u7684\u95ee\u9898\u3001\u5bf9\u7b56\u548c\u5c55\u671b\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"11\" tab=\"innerlink\" data-linktype=\"2\" rel=\"noopener noreferrer\">\u4e07\u5b57\u957f\u6587\u8be6\u89e3\uff1a\u5927\u6a21\u578b\u65f6\u4ee3AI\u4ef7\u503c\u5bf9\u9f50\u7684\u95ee\u9898\u3001\u5bf9\u7b56\u548c\u5c55\u671b<\/a><\/strong><\/span><strong style=\"outline: 0px;\"><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\">3.&nbsp;<\/strong><span style=\"text-decoration: underline;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247679853&amp;idx=2&amp;sn=4c61ab50748d4710424514fd7f2755f1&amp;chksm=e89965e0dfeeecf6d29fc68720e5ca6dd85e411fb27016566684dcbfb4753bfa352429474794&amp;scene=21#wechat_redirect\" textvalue=\"AI\u5b89\u5168\u524d\u6cbf | \u5927\u6a21\u578b\u8c04\u5a9a\u73b0\u8c61\u3001RLHF\u540e\u95e8\u653b\u51fb\u3001AI4Science\u6a21\u578b\u7684\u6ee5\u7528\u98ce\u9669\u3001\u6001\u52bf\u611f\u77e5\u80fd\u529b\u3001\u8868\u5f81\u5de5\u7a0b\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"11\" tab=\"innerlink\" data-linktype=\"2\" rel=\"noopener noreferrer\">AI\u5b89\u5168\u524d\u6cbf | \u5927\u6a21\u578b\u8c04\u5a9a\u73b0\u8c61\u3001RLHF\u540e\u95e8\u653b\u51fb\u3001AI4Science\u6a21\u578b\u7684\u6ee5\u7528\u98ce\u9669\u3001\u6001\u52bf\u611f\u77e5\u80fd\u529b\u3001\u8868\u5f81\u5de5\u7a0b<\/a><\/strong><\/span><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><\/strong><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><span style=\"outline: 0px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\">4.&nbsp;<\/strong><\/span><span style=\"outline: 0px;text-decoration: underline;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247667315&amp;idx=1&amp;sn=fca3a09806e753fa83d3fd6eb8ccf9d8&amp;chksm=e89914fedfee9de82ae7107592b173d2a1c35141fab0006d3eac2dcb2dd1e74aa78a86e09a93&amp;scene=21#wechat_redirect\" textvalue=\"\u5f20\u6c5f\uff1a\u7b2c\u4e09\u4ee3\u4eba\u5de5\u667a\u80fd\u6280\u672f\u57fa\u7840\u2014\u2014\u4ece\u53ef\u5fae\u5206\u7f16\u7a0b\u5230\u56e0\u679c\u63a8\u7406 | \u96c6\u667a\u5b66\u56ed\u5168\u65b0\u8bfe\u7a0b\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\" hasload=\"1\" style=\"outline: 0px;color: var(--weui-LINK);-webkit-user-drag: none;cursor: pointer;\" rel=\"noopener noreferrer\">\u5f20\u6c5f\uff1a\u7b2c\u4e09\u4ee3\u4eba\u5de5\u667a\u80fd\u6280\u672f\u57fa\u7840\u2014\u2014\u4ece\u53ef\u5fae\u5206\u7f16\u7a0b\u5230\u56e0\u679c\u63a8\u7406 | \u96c6\u667a\u5b66\u56ed\u5168\u65b0\u8bfe\u7a0b<\/a><\/strong><\/strong><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 8px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\">5.&nbsp;<\/strong><span style=\"outline: 0px;text-decoration: underline;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;\"><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzI0MjY5NTM2MQ==&amp;mid=2247508039&amp;idx=1&amp;sn=b9e492efd41fab0c2fa3ff4eb092c1f4&amp;chksm=e97a8de9de0d04ffabc439f22170122154bb9c4655cbf957bb5325258661059ef436dd8ce14f&amp;scene=21#wechat_redirect\" textvalue=\"\u52a0\u5165\u96c6\u667a\u5b66\u56edVIP\uff0c\u4e00\u6b21\u6027\u83b7\u53d6\u96c6\u667a\u5e73\u53f0\u6240\u6709\u5185\u5bb9\u8d44\u6e90\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"11\" tab=\"innerlink\" data-linktype=\"2\" hasload=\"1\" style=\"outline: 0px;color: var(--weui-LINK);-webkit-user-drag: none;cursor: pointer;\" rel=\"noopener noreferrer\">\u52a0\u5165\u96c6\u667a\u5b66\u56edVIP\uff0c\u4e00\u6b21\u6027\u83b7\u53d6\u96c6\u667a\u5e73\u53f0\u6240\u6709\u5185\u5bb9\u8d44\u6e90<\/a><\/strong><\/strong><\/strong><\/strong><\/span><\/section>\n<section style=\"margin-right: 8px;margin-bottom: 10px;margin-left: 8px;outline: 0px;letter-spacing: 0.544px;\"><strong style=\"outline: 0px;\">6.&nbsp;<\/strong><a target=\"_blank\" href=\"http:\/\/mp.weixin.qq.com\/s?__biz=MzIzMjQyNzQ5MA==&amp;mid=2247667297&amp;idx=2&amp;sn=988b7314df45d949e69e81257801fff2&amp;chksm=e89914ecdfee9dfac76f9245fb1fd0e5b25d567e20790fbdab671234588ad0e88e1acf205711&amp;scene=21#wechat_redirect\" textvalue=\"\u52a0\u5165\u96c6\u667a\uff0c\u4e00\u8d77\u200d\u590d\u6742\uff01\" linktype=\"text\" imgurl=\"\" imgdata=\"null\" data-itemshowtype=\"0\" tab=\"innerlink\" data-linktype=\"2\" hasload=\"1\" style=\"outline: 0px;color: var(--weui-LINK);-webkit-user-drag: none;cursor: pointer;\" rel=\"noopener noreferrer\"><span style=\"outline: 0px;text-decoration: underline;\"><strong style=\"outline: 0px;\">\u52a0\u5165\u96c6\u667a\uff0c\u4e00\u8d77\u590d\u6742\uff01<\/strong><\/span><\/a><\/section>\n<\/section>\n<p style=\"margin-right: 8px;margin-bottom: 5px;margin-left: 8px;outline: 0px;\"><br style=\"outline: 0px;\"  \/><\/p>\n<\/section>\n<p style=\"margin-bottom: 0px;outline: 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(136, 136, 136);font-size: 14px;background-color: rgb(255, 255, 255);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\"><br style=\"outline: 0px;\"  \/><\/p>\n<section style=\"margin-bottom: 0px;outline: currentcolor none 0px;letter-spacing: 0.544px;white-space: normal;color: rgb(136, 136, 136);font-size: 14px;background-color: rgb(255, 255, 255);font-family: -apple-system-font, system-ui, &quot;Helvetica Neue&quot;, &quot;PingFang SC&quot;, &quot;Hiragino Sans GB&quot;, &quot;Microsoft YaHei UI&quot;, &quot;Microsoft YaHei&quot;, Arial, sans-serif;\"><strong style=\"outline: 0px;letter-spacing: 0.544px;font-size: 15px;text-align: left;color: rgb(255, 255, 255);font-family: PingFangSC-light;\"><span style=\"outline: 0px;background-color: rgb(12, 130, 169);\">\u70b9\u51fb\u201c\u9605\u8bfb\u539f\u6587\u201d\uff0c\u62a5\u540d\u8bfb\u4e66\u4f1a<\/span><\/strong><\/section>\n<\/section>\n<\/section>\n<p style=\"display: none;\"><mp-style-type data-value=\"3\"><\/mp-style-type><\/p>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u5bfc\u8bed \u672c\u6587\u662f\u5317\u4eac\u5927\u5b66\u8ba1\u7b97\u673a\u56fe\u7075\u73ed\u672c\u79d1\u751f\u3001\u5317\u5927\u5bf9\u9f50\u5c0f\u7ec4 (PKU-Alignemnt\uff09\u6210\u5458\u90b1\u5929\u5f02\u64b0\u5199\u7684\u6587\u7ae0\uff0c\u4ecb\u7ecd\u4e86AI\u5bf9\u9f50\u95ee\u9898\u7684\u4e24\u79cd\u4e3b\u8981\u601d\u8def\uff1a\u57fa\u4e8e\u4eba\u7c7b\u53cd\u9988\u5f3a\u5316\u5b66\u4e60\uff08\u63a7\u5236\u8bba\u8fdb\u8def\uff09\uff0c\u5408\u4f5c\u9006\u5f3a\u5316\u5b66\u4e60\uff08\u535a\u5f08\u8bba\u8fdb\u8def\uff09\u3002\u90b1\u5929\u5f02\u5728\u5317\u5927PAIR-Lab\u8fdb\u884cAI\u5bf9\u9f50\u7814\u7a76\uff0c\u5173\u6ce8\u5bf9\u9f50\u4e0e\u9053\u5fb7\u4ef7\u503c\u7684\u4ea4\u53c9\uff0c\u5e76\u5408\u4f5c\u64b0\u5199\u7efc\u8ff0\u6587\u7ae0 A&#8230;<\/p>\n","protected":false},"author":0,"featured_media":48153,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[1],"tags":[],"special":[],"_links":{"self":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/48159"}],"collection":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/types\/post"}],"replies":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=48159"}],"version-history":[{"count":0,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/posts\/48159\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=\/wp\/v2\/media\/48153"}],"wp:attachment":[{"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=48159"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=48159"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=48159"},{"taxonomy":"special","embeddable":true,"href":"https:\/\/swarma.org\/index.php?rest_route=%2Fwp%2Fv2%2Fspecial&post=48159"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}