{"id":3826,"date":"2021-05-25T15:16:05","date_gmt":"2021-05-25T07:16:05","guid":{"rendered":"http:\/\/www.sniper97.cn\/?p=3826"},"modified":"2021-05-25T15:16:05","modified_gmt":"2021-05-25T07:16:05","slug":"%e3%80%90%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0%e7%ac%94%e8%ae%b0%e3%80%91%e9%97%a8%e6%8e%a7%e7%ba%bf%e6%80%a7%e5%8d%95%e5%85%83%ef%bc%88gated-linear-unit%ef%bc%89","status":"publish","type":"post","link":"http:\/\/www.sniper97.cn\/index.php\/note\/deep-learning\/note-deep-learning\/3826\/","title":{"rendered":"\u3010\u6df1\u5ea6\u5b66\u4e60\u7b14\u8bb0\u3011\u95e8\u63a7\u7ebf\u6027\u5355\u5143\uff08Gated Linear Unit\uff09"},"content":{"rendered":"\n<p>\u672c\u6587\u601d\u8def\u6765\u81ea\u8bba\u6587  <a rel=\"noreferrer noopener\" aria-label=\"GLU Variants Improve Transformer\uff08\u5728\u65b0\u7a97\u53e3\u6253\u5f00\uff09\" href=\"https:\/\/arxiv.org\/abs\/2002.05202\" target=\"_blank\">GLU Variants Improve Transformer<\/a> \uff0c\u800c\u8fd9\u7bc7\u8bba\u6587\u53c8\u501f\u7528\u4e86<a rel=\"noreferrer noopener\" aria-label=\" Language Modeling with Gated Convolutional Networks  \uff08\u5728\u65b0\u7a97\u53e3\u6253\u5f00\uff09\" href=\"https:\/\/arxiv.org\/abs\/1612.08083\" target=\"_blank\"> Language Modeling with Gated Convolutional Networks  <\/a>\u7684\u601d\u8def\u3002<\/p>\n\n\n<p>\u672c\u6587\u4e3b\u8981\u8bb2\u8ff0 <a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2002.05202\" target=\"_blank\">GLU Variants Improve Transformer<\/a> \u4ecb\u7ecd\u7684GLU\u3002<\/p>\n\n\n<p>\u4e3b\u8981\u5e94\u7528\u4e8eT5.1.1\u4e2d\u66ff\u6362FFN\u3002\u76ee\u524d\u4e0d\u6e05\u695a\u662f\u5426\u5728\u5176\u4ed6FFN\u7ed3\u6784\u4e2d\u4f9d\u7136\u6709\u589e\u76ca\uff08T5.1.1\u662f\u6709\u4e0d\u5c11\u7684\uff09\u3002<\/p>\n\n\n<p>\u5176\u5b9e\u6574\u4f53\u601d\u8def\u6bd4\u8f83\u7b80\u5355\uff0c\u901a\u8fc7\u5c06T5\u4e2d\u7684FFN\u4ece<\/p>\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img loading=\"lazy\" decoding=\"async\" width=\"380\" height=\"69\" src=\"\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-1.png\" alt=\"\" class=\"wp-image-3827\" srcset=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-1.png 380w, http:\/\/www.sniper97.cn\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-1-300x54.png 300w\" sizes=\"(max-width: 380px) 100vw, 380px\" \/><\/figure><\/div>\n\n\n<p>\u53d8\u6362\u4e3a\uff1a<\/p>\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img loading=\"lazy\" decoding=\"async\" width=\"555\" height=\"68\" src=\"\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-2.png\" alt=\"\" class=\"wp-image-3828\" srcset=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-2.png 555w, http:\/\/www.sniper97.cn\/wp-content\/uploads\/2021\/05\/\u56fe\u7247-2-300x37.png 300w\" sizes=\"(max-width: 555px) 100vw, 555px\" \/><\/figure><\/div>\n\n\n<p> \u4e5f\u5c31\u662f\u628a relu \u6fc0\u6d3b\u7684\u7b2c\u4e00\u4e2a\u53d8\u5316\u5c42\u6539\u4e3a\u4e86 gelu \u6fc0\u6d3b\u7684\u95e8\u63a7\u7ebf\u6027\u5355\u5143\uff0c\u8fd9\u6837 FFN \u5c42\u589e\u52a0\u4e86 50% \u53c2\u6570\uff08\u7531\u4e24\u4e2aDense\u7ec4\u6210FFN\u53d8\u4e3a\u4e09\u4e2aDense\u7ec4\u6210FFN\uff0c\u540c\u65f6\u7b2c\u4e00\u4e2aDense\u4f7f\u7528gelu\u6fc0\u6d3b\uff0c\u5176\u4f59\u4f7f\u7528linear\u6fc0\u6d3b\uff09\uff0c\u4f46\u662f\u4ece\u8bba\u6587\u6548\u679c\u770b\u6548\u679c\u660e\u663e\u589e\u52a0\u3002  <\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u6587\u601d\u8def\u6765\u81ea\u8bba\u6587 GLU Variants Improve Transformer \uff0c\u800c\u8fd9\u7bc7\u8bba\u6587\u53c8\u501f [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_mi_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[9],"tags":[37,39],"views":7533,"_links":{"self":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/3826"}],"collection":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/comments?post=3826"}],"version-history":[{"count":0,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/3826\/revisions"}],"wp:attachment":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/media?parent=3826"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/categories?post=3826"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/tags?post=3826"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}