{"id":3754,"date":"2021-03-13T16:08:09","date_gmt":"2021-03-13T08:08:09","guid":{"rendered":"http:\/\/www.sniper97.cn\/?p=3754"},"modified":"2021-03-13T16:08:09","modified_gmt":"2021-03-13T08:08:09","slug":"%e3%80%90%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0%e7%ac%94%e8%ae%b0%e3%80%91bert%e5%8f%82%e6%95%b0%e5%88%86%e6%9e%90","status":"publish","type":"post","link":"http:\/\/www.sniper97.cn\/index.php\/note\/deep-learning\/base\/3754\/","title":{"rendered":"\u3010\u6df1\u5ea6\u5b66\u4e60\u7b14\u8bb0\u3011Bert\u7684\u53c2\u6570\u5206\u6790"},"content":{"rendered":"\n<p>\u672c\u7bc7\u6587\u7ae0\u6211\u4eec\u5c06\u62c6\u5206Bert\uff0c\u7ec6\u7a76Bert\u7684\u7ed3\u6784\u4ee5\u53ca\u6bcf\u4e00\u5c42\u7684\u53c2\u6570\u4e2a\u6570<\/p>\n\n\n<p>\u6211\u4eec\u4ee5bert-base\u4e3a\u4f8b\uff08768\u7ef4\uff09\uff1a<\/p>\n\n\n<h4 class=\"wp-block-heading\">\u666e\u901abert\uff1a<\/h4>\n\n\n<p>bert\u7684\u6a21\u578b\u5982\u4e0b\uff08\u7701\u7565\u591a\u5c42\uff09\uff1a<\/p>\n\n\n<pre class=\"wp-block-code\"><code>Model: \"model\"\n__________________________________________________________________________________________________\nLayer (type)                    Output Shape         Param #     Connected to\n==================================================================================================\nInput-Token (InputLayer)        [(None, None)]       0\n__________________________________________________________________________________________________\nInput-Segment (InputLayer)      [(None, None)]       0\n__________________________________________________________________________________________________\nEmbedding-Token (Embedding)     multiple             16226304    Input-Token[0][0]\n                                                                 MLM-Norm[0][0]\n__________________________________________________________________________________________________\nEmbedding-Segment (Embedding)   (None, None, 768)    1536        Input-Segment[0][0]\n__________________________________________________________________________________________________\nEmbedding-Token-Segment (Add)   (None, None, 768)    0           Embedding-Token[0][0]\n                                                                 Embedding-Segment[0][0]\n__________________________________________________________________________________________________\nEmbedding-Position (PositionEmb (None, None, 768)    393216      Embedding-Token-Segment[0][0]\n__________________________________________________________________________________________________\nEmbedding-Norm (LayerNormalizat (None, None, 768)    1536        Embedding-Position[0][0]\n__________________________________________________________________________________________________\nEmbedding-Dropout (Dropout)     (None, None, 768)    0           Embedding-Norm[0][0]\n__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    2362368     Embedding-Dropout[0][0]\n                                                                 Embedding-Dropout[0][0]\n                                                                 Embedding-Dropout[0][0]\n__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    0           Transformer-0-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    0           Embedding-Dropout[0][0]\n                                                                 Transformer-0-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    1536        Transformer-0-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-0-FeedForward (Feed (None, None, 768)    4722432     Transformer-0-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-0-FeedForward-Dropo (None, None, 768)    0           Transformer-0-FeedForward[0][0]\n__________________________________________________________________________________________________\nTransformer-0-FeedForward-Add ( (None, None, 768)    0           Transformer-0-MultiHeadSelfAttent\n                                                                 Transformer-0-FeedForward-Dropout\n__________________________________________________________________________________________________\nTransformer-0-FeedForward-Norm  (None, None, 768)    1536        Transformer-0-FeedForward-Add[0][\n__________________________________________________________________________________________________\nTransformer-1-MultiHeadSelfAtte (None, None, 768)    2362368     Transformer-0-FeedForward-Norm[0]\n                                                                 Transformer-0-FeedForward-Norm[0]\n                                                                 Transformer-0-FeedForward-Norm[0]\n__________________________________________________________________________________________________\nTransformer-1-MultiHeadSelfAtte (None, None, 768)    0           Transformer-1-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-1-MultiHeadSelfAtte (None, None, 768)    0           Transformer-0-FeedForward-Norm[0]\n                                                                 Transformer-1-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-1-MultiHeadSelfAtte (None, None, 768)    1536        Transformer-1-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-1-FeedForward (Feed (None, None, 768)    4722432     Transformer-1-MultiHeadSelfAttent\n__________________________________________________________________________________________________\nTransformer-1-FeedForward-Dropo (None, None, 768)    0           Transformer-1-FeedForward[0][0]\n__________________________________________________________________________________________________\nTransformer-1-FeedForward-Add ( (None, None, 768)    0           Transformer-1-MultiHeadSelfAttent\n                                                                 Transformer-1-FeedForward-Dropout\n__________________________________________________________________________________________________\nTransformer-1-FeedForward-Norm  (None, None, 768)    1536        Transformer-1-FeedForward-Add[0][\n__________________________________________________________________________________________________<\/code><\/pre>\n\n\n<p>\u4e0b\u9762\u6211\u4eec\u7b80\u5355\u5256\u6790\u4e00\u4e0b\u5404\u90e8\u5206\u53c2\u6570\uff1a<\/p>\n\n\n<p>\u9996\u5148\u662f\u8f93\u5165\uff1a<\/p>\n\n\n<p>embedding\u90e8\u5206\uff0cbert\u4f7f\u7528\u4e86embedding\u3001token type\uff08\u7528\u6765\u533a\u5206\u4e24\u4e2a\u53e5\u5b50\uff09\u548cposition embedding\u4e09\u90e8\u5206\u3002<\/p>\n\n\n<p>embedding\u5c31\u662f (\u8fd9\u91cc\u4ee5\u8bcd\u5178\u5927\u5c0f21128\u4e3a\u4f8b) \uff1a<\/p>\n\n\n<p>voab size * embedding size = 21128*768=16226304\u3002<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nEmbedding-Token (Embedding)     multiple             16226304    Input-Token[0][0]\n                                                                 MLM-Norm[0][0]   <\/code><\/pre>\n\n\n<p>token type\uff1a<\/p>\n\n\n<p>\u4f7f\u75280\u548c1\u6807\u8bb0\u53e5\u5b50\uff08\u6bd4\u5982NSP\u4efb\u52a1\u65f6\u533a\u5206\u4e24\u4e2a\u53e5\u5b50\uff09\uff1a<\/p>\n\n\n<p>768*2=1536\u3002<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nEmbedding-Segment (Embedding)   (None, None, 768)    1536        Input-Segment[0][0]\n<\/code><\/pre>\n\n\n<p>position embedding\uff1a<\/p>\n\n\n<p>max length * embedding size = 512*768=393216<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nEmbedding-Position (PositionEmb (None, None, 768)    393216      Embedding-Token-Segment[0][0]\n_<\/code><\/pre>\n\n\n<p>\u7136\u540eBert\u5728embedding\u90e8\u5206\u8fd8\u6709\u4e00\u4e2alayer Normalization\uff0c\u56e0\u6b64\u8fd8\u8981\u6709768*2\u4e2a\u53c2\u6570\uff08  \u03b1 \u548c \u03b2 \uff09<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nEmbedding-Norm (LayerNormalizat (None, None, 768)    1536        Embedding-Position[0][0]               <\/code><\/pre>\n\n\n<p>\u641e\u61c2\u4e86embedding\u7684\u53c2\u6570\uff0c\u4e0b\u9762\u5c31\u662fTransformer\u7684\u53c2\u6570\uff0c\u4e3a\u4e86\u7b80\u4fbf\uff0c\u8fd9\u91cc\u53ea\u4ecb\u7ecd\u4e00\u5c42\u3002<\/p>\n\n\n<p>\u9996\u5148\u662f\u591a\u5934\u6ce8\u610f\u529b\uff1a<\/p>\n\n\n<p>bert base\u4f7f\u7528\u4e8612\u5934\u6ce8\u610f\u529b\u673a\u5236\uff0cQKV\u7ef4\u5ea6\u4e3a64\u7ef4\u5ea6\uff0c\u540c\u65f6\u6700\u540e\u8fd8\u9700\u8981\u4e00\u4e2aO\u77e9\u9635\uff0c\u5c0612\u5934\u6ce8\u610f\u529b\u7ed3\u5408\u3002<\/p>\n\n\n<p>\u56e0\u6b64\u603b\u53c2\u6570\u5c31\u662f\uff1a embedding size* head nub  * qkv size * len(qkv)[\u4e09\u4e2a\u6620\u5c04\u77e9\u9635]  + \uff08head nub* qkv size\uff09* embedding size[\u591a\u5934\u7ed3\u679c\u62fc\u63a5\u540e\u5904\u7406] + qkvo bias= 768*12*64*3 + 12*64*768+ 768*4 =2362368<\/p>\n\n\n<p>\uff08\u8fd9\u91cc\u6700\u540e\u7684768*4 \u5206\u522b\u4e3aQ\u77e9\u9635\u3001K\u77e9\u9635\u3001V\u77e9\u9635\u7684\u504f\u7f6e\u4ee5\u53ca\u6700\u540e\u7684O\u77e9\u9635\u7684\u504f\u7f6e\u3002\uff09<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    2362368     Embedding-Dropout[0][0]\n                                                                 Embedding-Dropout[0][0]\n                                                                 Embedding-Dropout[0][0]\n<\/code><\/pre>\n\n\n<p>\u968f\u540e\u662f\u591a\u5934\u6ce8\u610f\u529b\u673a\u5236\u7684LN\uff1a768*2=1536<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nTransformer-0-MultiHeadSelfAtte (None, None, 768)    1536        Transformer-0-MultiHeadSelfAttent\n<\/code><\/pre>\n\n\n<p>\u968f\u540e\uff0c\u662f\u5168\u8fde\u63a5\u5c42\uff1a<\/p>\n\n\n<p>Bert\u4e2d\u9690\u85cf\u5c42\u4e2a\u6570\u91c7\u7528\u4e86\u4f20\u7edf\u76844*input\u7684\u5927\u5c0f\uff0c\u56e0\u6b64\u4e3a4*768=3072\u3002<\/p>\n\n\n<p>\u56e0\u6b64\uff0c\u8fd9\u90e8\u5206\u53c2\u6570\u4e3a\uff1aembedding size * hidden size + bias + hidden size * embedding size + bias = 768*3072+3072+3072*768+768=4722432<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nTransformer-0-FeedForward (Feed (None, None, 768)    4722432     Transformer-0-MultiHeadSelfAttent<\/code><\/pre>\n\n\n<p>\u7136\u540e\u662fLN\uff1a768*2=1536<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nTransformer-0-FeedForward-Norm  (None, None, 768)    1536        Transformer-0-FeedForward-Add[0][  <\/code><\/pre>\n\n\n<p>\u7136\u540e\u5c31\u662f\u4e0b\u4e00\u5c42Transformer \u4ee5\u6b64\u7c7b\u63a8\u3002<\/p>\n\n\n<h4 class=\"wp-block-heading\">bert\u7684Conditional Layer Normalization\uff1a<\/h4>\n\n\n<p>\u4f7f\u7528\u4e86<a rel=\"noreferrer noopener\" aria-label=\"Conditional Layer Normalization\uff08\u5728\u65b0\u7a97\u53e3\u6253\u5f00\uff09\" href=\"http:\/\/www.sniper97.cn\/index.php\/note\/deep-learning\/note-deep-learning\/3782\/\" target=\"_blank\">Conditional Layer Normalization<\/a>\u540e\uff0cbert\u7684LayerNormalizattion\u53d8\u4e3a198144\u4e2a\u3002<\/p>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nEmbedding-Norm (LayerNormalizat (None, None, 768)    198144      Embedding-Position[0][0]\n                                                                 reshape[0][0]<\/code><\/pre>\n\n\n<p>\u7531\u4e8e \u03b2\u3001\u03b3 \u6ca1\u6709\u4efb\u4f55\u53d8\u5316\uff0c\u8fd8\u662f1536\u4e2a\u53c2\u6570\uff0c\u6211\u4eec\u53ef\u4ee5\u5206\u6790\u4e00\u4e0b\u8fd9\u591a\u51fa\u6765\u7684196608\u662f\u4ece\u54ea\u6765\u7684\u3002<\/p>\n\n\n<p>\u7531\u4e8e\u6211\u4eec\u9700\u8981\u5bf9  \u03b2\u3001\u03b3  \u8fdb\u884c\u76f8\u540c\u7684\u53d8\u6362\uff0c\u56e0\u6b64\u53c2\u6570\u4e2a\u6570\u4e5f\u662f\u76f8\u540c\u7684\uff0c\u6211\u4eec\u5206\u6790\u7684\u53c2\u6570\u683c\u5f0f\u53ef\u4ee5\u8fdb\u4e00\u6b65\u7f29\u5c0f\u4e3a98304\u4e2a\u3002<\/p>\n\n\n<p>\u800c\u6211\u4eec\u4e4b\u524d\u63d0\u5230\uff0c\u9700\u8981\u5c06c\u7684128\u7ef4\u5ea6\u5347\u5230768\u7ef4\uff0c\u5982\u679c\u4e0d\u8003\u8651bias\u504f\u7f6e\uff0c\u53ea\u505a\u77e9\u9635\u53d8\u6362\uff08\u6ca1\u6709\u504f\u7f6e\u7684\u5355\u5c42\u795e\u7ecf\u7f51\u7edc\u5b9e\u9645\u4e0a\u5c31\u662f\u77e9\u9635\u53d8\u6362\uff09\uff0c\u6070\u597d\u662f768*128=98304\u4e2a\u53c2\u6570\u3002<\/p>\n\n\n<p>\u8be6\u89c1 <a rel=\"noreferrer noopener\" href=\"http:\/\/www.sniper97.cn\/index.php\/note\/deep-learning\/note-deep-learning\/3782\/\" target=\"_blank\">Conditional Layer Normalization<\/a>\u3002<\/p>\n\n\n<h4 class=\"wp-block-heading\">bert\u7684mlm\u4efb\u52a1\uff1a<\/h4>\n\n\n<pre class=\"wp-block-code\"><code>__________________________________________________________________________________________________\nTransformer-11-FeedForward-Norm (None, None, 768)    1536        Transformer-11-FeedForward-Add[0]\n__________________________________________________________________________________________________\nMLM-Dense (Dense)               (None, None, 768)    590592      Transformer-11-FeedForward-Norm[0\n__________________________________________________________________________________________________\nMLM-Norm (LayerNormalization)   (None, None, 768)    1536        MLM-Dense[0][0]\n__________________________________________________________________________________________________\nMLM-Bias (BiasAdd)              (None, None, 21128)  21128       Embedding-Token[1][0]\n__________________________________________________________________________________________________\nMLM-Activation (Activation)     (None, None, 21128)  0           MLM-Bias[0][0]\n__________________________________________________________________________________________________\ncross_entropy (CrossEntropy)    (None, None, 21128)  0           Input-Token[0][0]\n                                                                 MLM-Activation[0][0]\n==================================================================================================     <\/code><\/pre>\n\n\n<p>\u8fd9\u91cc\u4e3b\u8981\u591a\u4e86MLM-Dense\u3001MLM-Norm \u3001MLM-Bias\u51e0\u5904\u3002<\/p>\n\n\n<p>MLM-Dense\u4e3a768*768+768=590592\u3002<\/p>\n\n\n<p>\u5176\u4ed6\u4e24\u4e2a\u5c31\u6ca1\u5565\u53ef\u8bf4\u7684\u4e86\u3002<\/p>\n\n\n<p>bert\u7684<a href=\"http:\/\/www.sniper97.cn\/index.php\/note\/deep-learning\/note-deep-learning\/3810\/\" target=\"_blank\" rel=\"noreferrer noopener\" aria-label=\"mlm\u4efb\u52a1\u5b9e\u73b0\u65b9\u5f0f\uff08\u5728\u65b0\u7a97\u53e3\u6253\u5f00\uff09\">mlm\u4efb\u52a1\u5b9e\u73b0\u65b9\u5f0f<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u672c\u7bc7\u6587\u7ae0\u6211\u4eec\u5c06\u62c6\u5206Bert\uff0c\u7ec6\u7a76Bert\u7684\u7ed3\u6784\u4ee5\u53ca\u6bcf\u4e00\u5c42\u7684\u53c2\u6570\u4e2a\u6570 \u6211\u4eec\u4ee5bert-base\u4e3a\u4f8b\uff087 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_mi_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[18,8,9],"tags":[31,37,39],"views":7903,"_links":{"self":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/3754"}],"collection":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/comments?post=3754"}],"version-history":[{"count":0,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/3754\/revisions"}],"wp:attachment":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/media?parent=3754"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/categories?post=3754"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/tags?post=3754"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}