{"id":1392,"date":"2019-09-01T13:28:45","date_gmt":"2019-09-01T05:28:45","guid":{"rendered":"http:\/\/www.sniper97.cn\/?p=1392"},"modified":"2019-09-01T13:28:45","modified_gmt":"2019-09-01T05:28:45","slug":"%e3%80%90%e6%9c%ba%e5%99%a8%e5%ad%a6%e4%b9%a0%e3%80%91%e7%a5%9e%e7%bb%8f%e7%bd%91%e7%bb%9c%e5%8f%8d%e5%90%91%e4%bc%a0%e6%92%ad","status":"publish","type":"post","link":"http:\/\/www.sniper97.cn\/index.php\/note\/machine-learning-in-action\/1392\/","title":{"rendered":"\u3010\u5434\u6069\u8fbe\u673a\u5668\u5b66\u4e60\u3011\u795e\u7ecf\u7f51\u7edc\u53cd\u5411\u4f20\u64ad"},"content":{"rendered":"\n<p> \u5434\u6069\u8fbeMachine-Learning \u7b2c\u4e94\u5468\uff1a\u795e\u7ecf\u7f51\u7edc\u53cd\u5411\u4f20\u64ad\uff08Neural Network back propagation\uff09 <\/p>\n\n\n<p>\u672c\u8282\u4f7f\u7528\u8d1f\u53cd\u9988\u65b9\u5f0f\u8bad\u7ec3\u795e\u7ecf\u7f51\u7edc\u3002<\/p>\n\n\n<p>\u4ee3\u4ef7\u51fd\u6570\u548c\u68af\u5ea6\u4e0b\u964d\u51fd\u6570\u5747\u6b63\u5219\u5316\u3002<\/p>\n\n\n<p>.<\/p>\n\n\n<p>\u6298\u817e\u4e86\u534a\u5929\uff0c\u53ef\u7b97\u8fd9\u5468\u7684\u8bfe\u7a0b\u548c\u4f5c\u4e1a\u90fd\u5b66\u5b8c\u4e86\u3002\u3002\u7279\u522b\u662f\u81ea\u5df1\u5b9e\u73b0\u7684\u90e8\u5206\uff0c\u8e29\u4e86\u597d\u591a\u8ddf\u7740\u6e90\u7801\u6765\u5b9e\u73b0\u9047\u4e0d\u5230\u7684\u5751\u3002\u3002\u3002<\/p>\n\n\n<p>\u753b\u51fa\u7684\u9690\u85cf\u5c42\u6570\u636e\u56fe\u5982\u4e0b<\/p>\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img loading=\"lazy\" decoding=\"async\" width=\"444\" height=\"442\" src=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2019\/09\/\u56fe\u7247.png\" alt=\"\" class=\"wp-image-1554\" srcset=\"http:\/\/www.sniper97.cn\/wp-content\/uploads\/2019\/09\/\u56fe\u7247.png 444w, http:\/\/www.sniper97.cn\/wp-content\/uploads\/2019\/09\/\u56fe\u7247-300x300.png 300w, http:\/\/www.sniper97.cn\/wp-content\/uploads\/2019\/09\/\u56fe\u7247-150x150.png 150w\" sizes=\"(max-width: 444px) 100vw, 444px\" \/><\/figure><\/div>\n\n\n<p>\u6e90\u4ee3\u7801\u52a0\u6ce8\u91ca\u52a0\u7406\u89e3\u7248\uff1a<\/p>\n\n\n<p>\u51c6\u786e\u7387\u57fa\u672c\u572890~\u00b795\u8fd9\u9644\u8fd1\u3002<\/p>\n\n\n<pre class=\"wp-block-code\"><code># -*- coding:utf-8 -*-\nimport matplotlib.pyplot as plt\nimport numpy\nimport numpy as np\nimport scipy.io as sio\nimport matplotlib\nimport scipy.optimize as opt\nfrom sklearn.metrics import classification_report\ndef load_data(path, transpose=True):\n    \"\"\"\n    \u8bfb\u53d6\u6570\u636e\n    :param path:\n    :param transpose:\n    :return:\n    \"\"\"\n    data = sio.loadmat(path)\n    y = data.get('y')\n    # \u5c06y\u7531\u5217\u5411\u91cf\u53d8\u6210\u884c\u5411\u91cf\n    y = y.reshape(y.shape[0])\n    X = data.get('X')  # 5000*400\n    if transpose:\n        # \u8f6c\u7f6e\u5f53\u524d\u6570\u636e\u4e2d,\u53ef\u4ee5\u7406\u89e3\u4e3a\u5c06\u56fe\u7247\u65cb\u8f6c,\u5c06\u6570\u636e\u5206\u4e3a5000\u4e2a20*20\u7684\u77e9\u9635\n        X = numpy.array([im.reshape((20, 20)).T for im in X])\n        # \u5c06\u65cb\u8f6c\u540e\u7684\u56fe\u7247\u5c55\u5f00\u6210\u4e00\u884c 5000*400\n        X = numpy.array([im.reshape(400) for im in X])\n    return X, y\nX, _ = load_data('.\/data\/ex4data1.mat', transpose=False)\ndef plot_100_image(X):\n    \"\"\"\n    #\u7ed8\u56fe\u51fd\u6570\uff0c\u753b100\u5f20\u56fe\u7247\n    X :\n    \"\"\"\n    # \u83b7\u5f97\u56fe\u7247\u5927\u5c0f\uff08\u6839\u53f7\u4e0b400\uff09\n    size = int(numpy.sqrt(X.shape[1]))\n    # \u968f\u673a\u4eceX\u4e2d\u9009\u62e9100\u7ec4\u6570\u636e\n    sample_idx = numpy.random.choice(numpy.arange(X.shape[0]), 100)\n    # \u53d6\u6570\u968f\u673a\u6570\u636e\n    sample_images = X[sample_idx, :]\n    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))\n    for r in range(10):\n        for c in range(10):\n            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),\n                                   cmap=matplotlib.cm.binary)\n            plt.xticks(numpy.array([]))\n            plt.yticks(numpy.array([]))\n# \u968f\u673a\u7ed8\u5236100\u5f20\u56fe\u7247\n# plot_100_image(X)\n# plt.show()\n\"\"\"\n\u524d\u9988\u795e\u7ecf\u7f51\u7edc\u7b97\u6cd5\n\"\"\"\nX_raw, y_raw = load_data('.\/data\/ex4data1.mat', transpose=False)\n# \u589e\u52a0\u5168\u90e8\u4e3a0\u7684\u4e00\u5217  (5000, 401)\nX = numpy.insert(X_raw, 0, numpy.ones(X_raw.shape[0]), axis=1)\ndef expand_y(y):\n    \"\"\"\n    \u5904\u7406\u7ed3\u679c\u5411\u91cf\n    \u5c061 2 \u8fd9\u79cd\u7ed3\u679c\u8f6c\u5316\u4e3a 0 1 0 0 0 0 0  \u8fd9\u79cd\u5411\u91cf\n    :param y:\n    :return:\n    \"\"\"\n    res = []\n    for i in y:\n        y_array = numpy.zeros(10)\n        y_array[i - 1] = 1\n        res.append(y_array)\n    return numpy.array(res)\n# \u83b7\u5f97\u7ed3\u679c\u96c6\u5904\u7406\u540e\u7684\u76ee\u6807\u5411\u91cf\ny = expand_y(y_raw)\ndef load_weight(path):\n    \"\"\"\n    \u8bfb\u53d6\u6743\u91cd\n    :param path:\n    :return:\n    \"\"\"\n    data = sio.loadmat(path)\n    return data['Theta1'], data['Theta2']\nt1, t2 = load_weight('.\/data\/ex4weights.mat')\ndef serialize(a, b):\n    \"\"\"\n    \u5f53\u6211\u4eec\u4f7f\u7528\u9ad8\u7ea7\u4f18\u5316\u65b9\u6cd5\u6765\u4f18\u5316\u795e\u7ecf\u7f51\u7edc\u65f6\uff0c\u6211\u4eec\u9700\u8981\u5c06\u591a\u4e2a\u53c2\u6570\u77e9\u9635\u5c55\u5f00\uff0c\u624d\u80fd\u4f20\u5165\u4f18\u5316\u51fd\u6570\uff0c\u7136\u540e\u518d\u6062\u590d\u5f62\u72b6\u3002\n    \u5e8f\u5217\u53162\u77e9\u9635\n    \u8fd9\u4e2a\u65b9\u6cd5\u7684\u76ee\u7684\u5c31\u662f\u4f7f\u53c2\u6570\u6241\u5e73\u5316\n    # \u5728\u8fd9\u4e2ann\u67b6\u6784\u4e2d\uff0c\u6211\u4eec\u6709theta1\uff0825,401\uff09\uff0ctheta2\uff0810,26\uff09\uff0c\u5b83\u4eec\u7684\u68af\u5ea6\u662fdelta1\uff0cdelta2\n    :param a:\n    :param b:\n    :return:\n    \"\"\"\n    # concatenate \u65b9\u6cd5\u53ef\u4ee5\u5b8c\u6210\u6570\u7ec4\u62fc\u63a5\n    # ravel \u65b9\u6cd5\u53ef\u4ee5\u5c06\u6570\u636e\u6241\u5e73\u5316\n    # \u8fd9\u4e2a\u6709\u4e2a\u7c7b\u4f3c\u65b9\u6cd5\u53eb flatten \u8be5\u65b9\u6cd5\u4e5f\u53ef\u4ee5\u505a\u6570\u636e\u6241\u5e73\u5316\uff0c\u548craval\u7684\u533a\u522b\u662fravel\u8fd4\u56de\u7684\u662f\u5f15\u7528\uff0c\u5bf9ravel\u540e\u7684\n    # \u5bf9\u8c61\u8fdb\u884c\u4fee\u6539\u4f1a\u5f71\u54cd\u5230\u539f\u6570\u636e\uff0c\u800cflatten\u5219\u4f1a\u5148copy \u4e0d\u4f1a\u5bf9\u539f\u6570\u636e\u4ea7\u751f\u5f71\u54cd\u3002\n    return numpy.concatenate((numpy.ravel(a), numpy.ravel(b)))\n# \u6241\u5e73\u5316\u53c2\u6570\uff0c25*401+10*26=10285\ntheta = serialize(t1, t2)\ndef feed_forward(theta, X):\n    \"\"\"apply to architecture 400+1 * 25+1 *10\n    X: 5000 * 401\n    \"\"\"\n    t1, t2 = deserialize(theta)  # t1: (25,401) t2: (10,26)\n    m = X.shape[0]\n    a1 = X  # 5000 * 401\n    z2 = a1 @ t1.T  # 5000 * 25\n    a2 = numpy.insert(sigmoid(z2), 0, numpy.ones(m), axis=1)  # 5000*26\n    z3 = a2 @ t2.T  # 5000 * 10\n    h = sigmoid(z3)  # 5000*10, this is h_theta(X)\n    return a1, z2, a2, z3, h  # you need all those for backprop\ndef sigmoid(z):\n    return 1 \/ (1 + numpy.exp(-z))\ndef deserialize(seq):\n    \"\"\"\n    \u5c06\u6241\u5e73\u5316\u5904\u7406\u4e4b\u540e\u7684\u6570\u636e\u6062\u590d\u5230\u4e4b\u524d\u7684\u6837\u5b50\uff08\u4e24\u4e2atheta\n    :param seq:\n    :return:\n    \"\"\"\n    #     \"\"\"into ndarray of (25, 401), (10, 26)\"\"\"\n    return seq[:25 * 401].reshape(25, 401), seq[25 * 401:].reshape(10, 26)\n# h\u662f\u8ba1\u7b97\u540e\u7684\u7ed3\u679c\u96c6\n# _, _, _, _, h = feed_forward(theta, X)\n# print(h)\ndef cost(theta, X, y):\n    \"\"\"\n    \u4ee3\u4ef7\u51fd\u6570\n    :param theta:\n    :param X:\n    :param y:\n    :return:\n    \"\"\"\n    m = X.shape[0]  # get the data size m\n    _, _, _, _, h = feed_forward(theta, X)\n    # h\u662f\u5df2\u7ecf\u6fc0\u6d3b\u4e4b\u540e\u7684\u7ed3\u679c\u96c6 \u6240\u4ee5\u8fd9\u91cch\u4e0d\u9700\u8981\u8fdb\u884c\u6fc0\u6d3b\n    pair_computation = -numpy.multiply(y, numpy.log(h)) - numpy.multiply((1 - y), numpy.log(1 - h))\n    return pair_computation.sum() \/ m\n# cost(theta, X, y)\n# 0.287629165161\n# print(cost(theta, X, y))\n\"\"\"\n\u8d1f\u53cd\u9988\u795e\u7ecf\u7f51\u7edc\n\"\"\"\ndef regularized_cost(theta, X, y, l=1):\n    \"\"\"\n    \u6b63\u5219\u5316\u4ee3\u4ef7\u51fd\u6570\n    :param theta:\n    :param X:\n    :param y:\n    :param l:\n    :return:\n    \"\"\"\n    t1, t2 = deserialize(theta)  # t1: (25,401) t2: (10,26)\n    m = X.shape[0]\n    # \u5ffd\u7565\u7b2c\u4e00\u9879\uff0c\u8ba1\u7b97t1\u548ct2\u7684\u4ee3\u4ef7\n    reg_t1 = (l \/ (2 * m)) * numpy.power(t1[:, 1:], 2).sum()\n    reg_t2 = (l \/ (2 * m)) * numpy.power(t2[:, 1:], 2).sum()\n    cost1 = cost(theta, X, y)\n    a1 = cost(theta, X, y) + reg_t1 + reg_t2\n    return cost(theta, X, y) + reg_t1 + reg_t2\n# 0.383769859091\n# print(regularized_cost(theta, X, y))\ndef sigmoid_gradient(z):\n    \"\"\"\n    sigmoid\u7684\u5bfc\u51fd\u6570\uff0c\u68af\u5ea6\u4e0b\u964d\u65f6\u4f7f\u7528\n    \"\"\"\n    return numpy.multiply(sigmoid(z), 1 - sigmoid(z))\n# 0.25\n# print(sigmoid_gradient(0))\ndef gradient(theta, X, y):\n    \"\"\"\n    \u68af\u5ea6\u4e0b\u964d\n    :param theta:\n    :param X:\n    :param y:\n    :return:\n    \"\"\"\n    # \u5c06\u6241\u5e73\u5316\u7684\u6570\u636e\u62c6\u5206\n    t1, t2 = deserialize(theta)  # t1: (25,401) t2: (10,26)\n    m = X.shape[0]\n    # \u521d\u59cb\u5316\u4e24\u4e2adelta\n    delta1 = numpy.zeros(t1.shape)  # (25, 401)\n    delta2 = numpy.zeros(t2.shape)  # (10, 26)\n    # \u5206\u522b\u662f\u8f93\u5165\u5c42  \u9690\u85cf\u5c42\u6fc0\u6d3b\u524d \u9690\u85cf\u5c42\u6fc0\u6d3b\u540e  \u8f93\u51fa\u5c42\u6fc0\u6d3b\u524d \u8f93\u51fa\u5c42\u6fc0\u6d3b\u540e\n    a1, z2, a2, z3, h = feed_forward(theta, X)\n    # \u5bf9\u6bcf\u4e00\u7ec4\u6570\u636e\u8fdb\u884c\u904d\u5386\n    for i in range(m):\n        a1i = a1[i, :]  # (1, 401)\n        z2i = z2[i, :]  # (1, 25)\n        a2i = a2[i, :]  # (1, 26)\n        hi = h[i, :]  # (1, 10)\n        yi = y[i, :]  # (1, 10)\n        # \u8f93\u51fa\u5c42\u8bef\u5dee\n        d3i = hi - yi  # (1, 10)\n        z2i = numpy.insert(z2i, 0, numpy.ones(1))  # make it (1, 26) to compute d2i\n        # \u9690\u85cf\u5c42\u8bef\u5dee\n        # d2i = numpy.multiply(t2.T @ d3i, sigmoid_gradient(z2i))  # (1, 26)\n        # \u548c\u4e0a\u9762\u7b49\u4ef7\n        #   \u6ce8\u610f\u77e9\u9635\u4e58\u6cd5\u548c\u77e9\u9635\u70b9\u4e58\u7684\u533a\u522b   1,26 * 1,26 = 1,26\n        d2i = (t2.T @ d3i) * sigmoid_gradient(z2i)\n        # careful with np vector transpose\n        # \u53c2\u6570\u8d1f\u53cd\u9988\uff0c\u6ce8\u610f\u77e9\u9635\u8f6c\u7f6e\n        delta2 += numpy.matrix(d3i).T @ numpy.matrix(a2i)  # (1, 10).T @ (1, 26) -> (10, 26)\n        delta1 += numpy.matrix(d2i[1:]).T @ numpy.matrix(a1i)  # (1, 25).T @ (1, 401) -> (25, 401)\n    delta1 = delta1 \/ m\n    delta2 = delta2 \/ m\n    return serialize(delta1, delta2)\n# d1, d2 = deserialize(gradient(theta, X, y))\ndef regularized_gradient(theta, X, y, l=1):\n    \"\"\"\n    \u6b63\u5219\u5316\u68af\u5ea6\u4e0b\u964d\n    don't regularize theta of bias terms\n    \u4e0d\u6b63\u5219theta\u7684\u504f\u7f6e\n    \"\"\"\n    m = X.shape[0]\n    # \u83b7\u53d6\u5230\u8d1f\u53cd\u9988\u4e4b\u540e\u8ba1\u7b97\u5230\u7684\u4e24\u4e2a\u7f51\u7edc\u53c2\u6570\n    # \u8fd9\u91ccdelta  \u4e3a\u7ecf\u8fc7\u68af\u5ea6\u4e0b\u964d\u8ba1\u7b97\u540e\u7684\u53c2\u6570\n    # t \u4e3a\u7ecf\u8fc7\u8ba1\u7b97\u524d\u7684\u53c2\u6570  \u7136\u540e\u7528delta\u548ct\u8ba1\u7b97\u8bef\u5dee\u9879\n    delta1, delta2 = deserialize(gradient(theta, X, y))\n    t1, t2 = deserialize(theta)\n    # \u4e0d\u60e9\u7f5a\u7b2c\u4e00\u9879\uff1a\u5c06\u6700\u524d\u9762\u6dfb\u52a0\u7684\u4e00\u5217\u7f6e0\uff0c\u4e0d\u52a0\u5165\u8ba1\u7b97\uff080*n\uff09\n    # \u8fd9\u91cc\u5bf9t1\u3001reg term d1 \u7684\u64cd\u4f5c\u6211\u7406\u89e3\u662f\u5728\u505a\u6b63\u5219\u5316\uff0c\u52a0\u4e0a\u4e00\u4e2a\u539f\u6570\u636e\u7684\u6bd4\u4f8b\uff0c\u9632\u6b62\u6570\u636e\u5dee\u8ddd\u8fc7\u5927\n    # \u4f46\u662f\u8fd9\u91cc\u53bb\u6389\u52a0\u4e0areg term \uff0c\u6b63\u786e\u7387\u4e0d\u4f46\u6ca1\u6709\u4e0b\u964d\uff0c\u751a\u81f3\u89c9\u5f97\u5c0f\u6da8...\n    # \u53ef\u80fd\u8fd9\u4e2a\u6b63\u5219\u5316\u4e5f\u4e0d\u662f\u6bcf\u4e2a\u90fd\u9700\u8981\u7684\u5427\n    t1[:, 0] = 0\n    reg_term_d1 = (l \/ m) * t1\n    # \u4fee\u6539delta\u53c2\u6570\n    delta1 = delta1 + reg_term_d1\n    t2[:, 0] = 0\n    reg_term_d2 = (l \/ m) * t2\n    delta2 = delta2 + reg_term_d2\n    delta1_sum = numpy.sum(delta1)\n    delta2_sum = numpy.sum(delta2)\n    a1 = np.sum(serialize(delta1, delta2))\n    return serialize(delta1, delta2)\n# \u8fd9\u4e2a\u8fd0\u884c\u5f88\u6162\uff0c\u8c28\u614e\u8fd0\u884c\n# gradient_checking(theta, X, y, epsilon=0.0001, regularized=True)\ndef random_init(size):\n    \"\"\"\n    \u968f\u673a\u521d\u59cb\u5316\n    :param size:\n    :return:\n    \"\"\"\n    return np.random.uniform(-0.12, 0.12, size)\ndef nn_training(X, y):\n    \"\"\"regularized version\n    the architecture is hard coded here... won't generalize\n    \u7f51\u7edc\u8bad\u7ec3\n    \u6b63\u5219\u7248\u672c\n    \"\"\"\n    # \u521d\u59cb\u5316\u53c2\u6570\n    init_theta = random_init(10285)  # 25*401 + 10*26\n    # init_theta = np.zeros(10285)\n    # \u4f7f\u7528scipy\u8ba1\u7b97\u4ee3\u4ef7\u6700\u5c0f\u7684\u70b9\n    res = opt.minimize(fun=regularized_cost,\n                       x0=init_theta,\n                       args=(X, y, 1),\n                       jac=regularized_gradient,\n                       method='TNC')\n    return res\nres = nn_training(X, y)  # \u6162\nprint(res)\n_, y_answer = load_data('.\/data\/ex4data1.mat')\nprint(y_answer[:20])\nfinal_theta = res.x\ndef show_accuracy(theta, X, y):\n    \"\"\"\n    \u8f93\u51fa\u51c6\u786e\u7387\n    :param theta:\n    :param X:\n    :param y:\n    :return:\n    \"\"\"\n    _, _, _, _, h = feed_forward(theta, X)\n    y_pred = np.argmax(h, axis=1) + 1\n    print(classification_report(y, y_pred))\n\"\"\"\n             precision    recall  f1-score   support\n           1       0.97      0.99      0.98       500\n           2       0.99      0.94      0.96       500\n           3       0.90      0.98      0.94       500\n           4       1.00      0.68      0.81       500\n           5       1.00      0.46      0.63       500\n           6       0.90      0.99      0.94       500\n           7       1.00      0.78      0.88       500\n           8       0.78      1.00      0.88       500\n           9       0.63      0.99      0.77       500\n          10       0.94      1.00      0.97       500\n    accuracy                           0.88      5000\n   macro avg       0.91      0.88      0.88      5000\nweighted avg       0.91      0.88      0.88      5000\n              precision    recall  f1-score   support\n           1       0.98      0.97      0.97       500\n           2       0.90      0.99      0.94       500\n           3       0.95      0.94      0.94       500\n           4       0.98      0.96      0.97       500\n           5       1.00      0.81      0.89       500\n           6       0.99      0.96      0.98       500\n           7       0.98      0.94      0.96       500\n           8       0.79      0.99      0.88       500\n           9       0.98      0.91      0.94       500\n          10       0.97      0.99      0.98       500\n    accuracy                           0.95      5000\n   macro avg       0.95      0.95      0.95      5000\nweighted avg       0.95      0.95      0.95      5000\n\"\"\"\nshow_accuracy(final_theta, X, y_raw)\ndef plot_hidden_layer(theta):\n    \"\"\"\n    \u7ed8\u5236\u9690\u85cf\u5c42\n    theta: (10285, )\n    \"\"\"\n    final_theta1, _ = deserialize(theta)\n    hidden_layer = final_theta1[:, 1:]  # ger rid of bias term theta\n    fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(5, 5))\n    for r in range(5):\n        for c in range(5):\n            ax_array[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)),\n                                   cmap=matplotlib.cm.binary)\n            plt.xticks(np.array([]))\n            plt.yticks(np.array([]))\nplot_hidden_layer(final_theta)\nplt.show()\n<\/code><\/pre>\n\n\n<p>\u6574\u7406\u4e86\u4e00\u4e0b\u4ee3\u7801\u81ea\u5df1\u53c8\u91cd\u65b0\u6253\u4e86\u4e00\u904d\uff1a<\/p>\n\n\n<p>\u8fd9\u91cc\u9700\u8981\u6ce8\u610f\u7684\u5c31\u662f\u5728\u6b63\u5219\u5316\u7684\u65f6\u5019\u7684\u53c2\u6570l\u3002\u3002\u597d\u591a\u6b21\u90fd\u6253\u6210\u4e861\u3002<\/p>\n\n\n<p>\u8fd8\u6709\u5c31\u662f\u8ba1\u7b97\u957f\u5ea6\u4e0d\u80fd\u7528\u53c2\u6570t\u3002\u3002\u8981\u7528X\uff0c\u9519\u4e86\u597d\u51e0\u4e2a\u5730\u65b9<\/p>\n\n\n<p>\u81ea\u5df1\u5b9e\u73b0\u7684\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n\n\n<pre class=\"wp-block-code\"><code># -*- coding:utf-8 -*-\nimport matplotlib\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport scipy.io as sio\nimport scipy.optimize as opt\nfrom sklearn.metrics import classification_report\n\"\"\"\n\u76f8\u5173\u65b9\u6cd5\n\"\"\"\ndef load_data(path):\n    \"\"\"\n    \u8bfb\u53d6\u6570\u636e\n    :param path:\n    :param transpose:\n    :return:\n    \"\"\"\n    data = sio.loadmat(path)\n    y = data.get('y')\n    # \u5c06y\u7531\u5217\u5411\u91cf\u53d8\u6210\u884c\u5411\u91cf\n    y = y.reshape(y.shape[0])\n    X = data.get('X')  # 5000*400\n    return X, y\ndef sigmoid(Z):\n    \"\"\"\n    sigmoid \u51fd\u6570\n    :param Z:\n    :return:\n    \"\"\"\n    return 1 \/ (1 + np.exp(-Z))\ndef sigmoid_derivative(Z):\n    \"\"\"\n    sigmoid \u7684\u5bfc\u51fd\u6570\n    :param Z:\n    :return:\n    \"\"\"\n    # return sigmoid(Z) * (1 - sigmoid(Z))\n    return np.multiply(sigmoid(Z), 1 - sigmoid(Z))\ndef serialize(theta1, theta2):\n    \"\"\"\n    \u6241\u5e73\u5316\u53c2\u6570\n    :param theta1:\n    :param theta2:\n    :return:\n    \"\"\"\n    return np.concatenate((np.ravel(theta1), np.ravel(theta2)))\ndef deserialize(theta):\n    \"\"\"\n    \u53cd\u6241\u5e73\u5316\u53c2\u6570\n    :param theta:\n    :return:\n    \"\"\"\n    return theta[:25 * 401].reshape(25, 401), theta[25 * 401:].reshape(10, 26)\ndef feed_forward(theta, X):\n    \"\"\"\n    \u795e\u7ecf\u7f51\u7edc\u8ba1\u7b97\n    :param theta:\n    :param X:\n    :return:\n    \"\"\"\n    # \u53cd\u6241\u5e73\u5316\u83b7\u53d6\u5f53\u524d\u53c2\u6570\n    t1, t2 = deserialize(theta)\n    # \u8ba1\u7b97\u9690\u85cf\u5c42\u6fc0\u6d3b\u524d\u6570\u636e\n    hiden_layar_init_data = X @ t1.T\n    # \u6fc0\u6d3b\u9690\u85cf\u5c42\u51fd\u6570,\u5e76\u589e\u52a0\u4e00\u5217\u504f\u7f6e\n    hiden_layar_final_data = np.insert(sigmoid(hiden_layar_init_data), 0, np.ones(X.shape[0]), axis=1)\n    # \u8ba1\u7b97\u8f93\u51fa\u5c42\u6fc0\u6d3b\u524d\u6570\u636e\n    output_layar_init_data = hiden_layar_final_data @ t2.T\n    # \u6fc0\u6d3b\u8f93\u51fa\u5c42\u6570\u636e\n    output_layar_final_data = sigmoid(output_layar_init_data)\n    # \u5206\u522b\u8fd4\u56de\u8fd9\u56db\u4e2a\u6570\u636e\n    return hiden_layar_init_data, hiden_layar_final_data, output_layar_init_data, output_layar_final_data\ndef gradient(theta, X, y, l=1):\n    \"\"\"\n    \u68af\u5ea6\u4e0b\u964d\u51fd\u6570\n    :return:\n    \"\"\"\n    # \u53cd\u5e8f\u5217\u5316theta\n    t1, t2 = deserialize(theta)\n    # \u521d\u59cb\u5316\u4e24\u4e2a\u4e2at1,t2\u4e00\u6837\u5927\u7684\u53d8\u91cf\uff0c\u7528\u6765\u5b58\u653e\u9700\u8981\u53d8\u5316\u7684\u91cf\u4ee5\u8fdb\u884c\u8d1f\u53cd\u9988\n    delta1 = np.zeros(t1.shape)\n    delta2 = np.zeros(t2.shape)\n    # \u83b7\u53d6\u795e\u7ecf\u7f51\u7edc\u8ba1\u7b97\u7ed3\u679c\n    hiden_layar_init_data, hiden_layar_final_data, \\\n    output_layar_init_data, output_layar_final_data = feed_forward(theta, X)\n    data_length = X.shape[0]\n    for i in range(data_length):\n        # \u8ba1\u7b97\u8f93\u51fa\u5c42\u8bef\u5dee\n        output_layar_final_data_error = output_layar_final_data[i] - y[i]\n        # \u5bf9\u9690\u85cf\u5c42\u5904\u7406\u524d\u7684\u6570\u636e\u6dfb\u52a0\u504f\u7f6e\u5217\n        # hiden_layar_init_data_with_bias = np.insert(hiden_layar_init_data[i], 0, 1)\n        hiden_layar_init_data_with_bias = np.insert(hiden_layar_init_data[i], 0, np.ones(1))\n        # \u8ba1\u7b97\u9690\u85cf\u5c42\u7684\u8bef\u5dee\n        hiden_layar_final_data_error = (t2.T @ output_layar_final_data_error) \\\n                                       * sigmoid_derivative(hiden_layar_init_data_with_bias)\n        # \u5bf9delta\u8fdb\u884c\u53cd\u9988\u8ba1\u7b97\n        # delta1 += output_layar_final_data_error.T @ hiden_layar_final_data[i]\n        delta2 += np.matrix(output_layar_final_data_error).T @ np.matrix(hiden_layar_final_data[i])\n        delta1 += np.matrix(hiden_layar_final_data_error[1:]).T @ np.matrix(X[i])\n    delta1 = delta1 \/ data_length\n    delta2 = delta2 \/ data_length\n    return serialize(delta1, delta2)\ndef regularized_gradient(theta, X, y, l=1):\n    \"\"\"\n    \u6b63\u5219\u5316\u68af\u5ea6\u4e0b\u964d\u51fd\u6570\n    :param theta:\n    :param X:\n    :param y:\n    :param l:\n    :return:\n    \"\"\"\n    # \u539f\u59cb\u53c2\u6570\u53bb\u6241\u5e73\u5316\n    t1, t2 = deserialize(theta)\n    # \u8fdb\u884c\u4e0b\u964d\u4e4b\u540e\u7684\u53c2\u6570\uff0c\u8fd9\u91cc\u4e3a\u4e86\u9632\u6b62delta\u7684\u53c2\u6570\u5dee\u8ddd\u8fc7\u5927\uff0c\u9700\u8981\u4f9d\u636e\u539f\u6570\u636e\uff08t1 t2\uff09\u5bf9\u53c2\u6570\u8fdb\u884c\u6b63\u5219\u5316\n    delta1, delta2 = deserialize(gradient(theta, X, y))\n    # \u6570\u636e\u4e2a\u6570\n    data_length = X.shape[0]\n    # \u4e0d\u60e9\u7f5a\u7b2c\u4e00\u9879 \u6240\u4ee5t1\u3001t2\u7684\u7b2c\u4e00\u9879\u8bbe\u4e3a0\n    t1[:, 0] = 0\n    t2[:, 0] = 0\n    reg_t1 = (l \/ data_length) * t1\n    reg_t2 = (l \/ data_length) * t2\n    delta1 = delta1 + reg_t1\n    delta2 = delta2 + reg_t2\n    delta1_sum = np.sum(delta1)\n    delta2_sum = np.sum(delta2)\n    a2 = np.sum(serialize(delta1, delta2))\n    return serialize(delta1, delta2)\ndef cost(theta, X, y, l=1):\n    \"\"\"\n    \u4ee3\u4ef7\u8ba1\u7b97\u51fd\u6570\n    :param theta:\n    :param X:\n    :param y:\n    :param l:\n    :return:\n    \"\"\"\n    data_length = X.shape[0]\n    _, _, _, output_layar_final_data = feed_forward(theta, X)\n    pair_computation = -np.multiply(y, np.log(output_layar_final_data)) - np.multiply((1 - y), np.log(\n        1 - output_layar_final_data))\n    return pair_computation.sum() \/ data_length\n    # return np.sum(\n    #     -(y * np.log(output_layar_final_data)) - ((1 - y) * np.log(1 - output_layar_final_data))) \/ data_length\ndef expand_y(y):\n    \"\"\"\n    \u5904\u7406\u7ed3\u679c\u5411\u91cf\n    \u5c061 2 \u8fd9\u79cd\u7ed3\u679c\u8f6c\u5316\u4e3a 0 1 0 0 0 0 0  \u8fd9\u79cd\u5411\u91cf\n    :param y:\n    :return:\n    \"\"\"\n    res = []\n    for i in y:\n        y_array = np.zeros(10)\n        y_array[i - 1] = 1\n        res.append(y_array)\n    return np.array(res)\ndef regularized_cost(theta, X, y, l=1):\n    \"\"\"\n    \u6b63\u5219\u5316\u4ee3\u4ef7\u8ba1\u7b97\u51fd\u6570\n    :param theta:\n    :param X:\n    :param y:\n    :param l:\n    :return:\n    \"\"\"\n    t1, t2 = deserialize(theta)\n    data_length = X.shape[0]\n    # \u4e0d\u60e9\u7f5a\u7b2c\u4e00\u9879\uff0c\u5c06\u7b2c\u4e00\u5217\u53bb\u6389\n    reg_t1 = (l \/ (2 * data_length)) * np.power(t1[:, 1:], 2).sum()\n    reg_t2 = (l \/ (2 * data_length)) * np.power(t2[:, 1:], 2).sum()\n    cost1 = cost(theta, X, y)\n    a2 = cost(theta, X, y, l) + reg_t1 + reg_t2\n    return cost(theta, X, y, l) + reg_t1 + reg_t2\ndef show_accuracy(theta, X, y):\n    \"\"\"\n    \u8f93\u51fa\u51c6\u786e\u7387\n    :param theta:\n    :param X:\n    :param y:\n    :return:\n    \"\"\"\n    _, _, _, h = feed_forward(theta, X)\n    y_pred = np.argmax(h, axis=1) + 1\n    print(classification_report(y, y_pred))\ndef random_init(size):\n    \"\"\"\n    \u968f\u673a\u521d\u59cb\u5316\n    :param size:\n    :return:\n    \"\"\"\n    return np.random.uniform(-0.12, 0.12, size)\ndef network_training(X, y):\n    \"\"\"\n    \u795e\u7ecf\u7f51\u7edc\u8bad\u7ec3\n    :param X:\n    :param y:\n    :return:\n    \"\"\"\n    init_theta = random_init(401 * 25 + 26 * 10)\n    # init_theta = np.zeros(10285)\n    res = opt.minimize(fun=regularized_cost,\n                       x0=init_theta,\n                       args=(X, y, 1),\n                       jac=regularized_gradient,\n                       method='TNC')\n    return res\ndef plot_hidden_layer(theta):\n    \"\"\"\n    \u7ed8\u5236\u9690\u85cf\u5c42\n    theta: (10285, )\n    \"\"\"\n    final_theta1, _ = deserialize(theta)\n    hidden_layer = final_theta1[:, 1:]  # ger rid of bias term theta\n    fig, ax_array = plt.subplots(nrows=5, ncols=5, sharey=True, sharex=True, figsize=(5, 5))\n    for r in range(5):\n        for c in range(5):\n            ax_array[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)),\n                                   cmap=matplotlib.cm.binary)\n            plt.xticks(np.array([]))\n            plt.yticks(np.array([]))\nif __name__ == '__main__':\n    X, y = load_data('.\/data\/ex4data1.mat')\n    y_raw = expand_y(y)\n    X = np.insert(X, 0, np.ones(X.shape[0]), axis=1)\n    res = network_training(X, y_raw)\n    print(res)\n    show_accuracy(res.x, X, y)\n    plot_hidden_layer(res.x)\n    plt.show()\n<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u5434\u6069\u8fbeMachine-Learning \u7b2c\u4e94\u5468\uff1a\u795e\u7ecf\u7f51\u7edc\u53cd\u5411\u4f20\u64ad\uff08Neural Network ba [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_mi_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[6,10],"tags":[],"views":4870,"_links":{"self":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/1392"}],"collection":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/comments?post=1392"}],"version-history":[{"count":0,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/posts\/1392\/revisions"}],"wp:attachment":[{"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/media?parent=1392"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/categories?post=1392"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.sniper97.cn\/index.php\/wp-json\/wp\/v2\/tags?post=1392"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}