...
 
Commits (2)
......@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
......@@ -324,7 +324,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
......@@ -337,7 +337,7 @@
" print(\"\")\n",
" with open(fn, 'w') as output_file:\n",
" for idx,x in enumerate(scores):\n",
" output_file.write(f\"{x}\\n\")"
" output_file.write(\"{}\\n\".format(x))"
]
},
{
......@@ -358,7 +358,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -418,7 +418,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
......@@ -558,8 +558,8 @@
"print(\"Best Parameters for {}\".format(kernel))\n",
"print(params_rq)\n",
"\n",
"\n",
"\n",
"y_pred_rq = model.predict(X_val)\n",
"pearson_rq = pearsonr(y_val, y_pred_rq)\n",
"print('RMSE: {} Pearson {}'.format(rmse(y_pred_rq, y_val), pearson_rq[0]))\n",
"\n",
"# Best Parameters for RationalQuadratic(alpha=1, length_scale=1)\n",
......@@ -588,41 +588,81 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "__init__() got an unexpected keyword argument 'alpha_bounds'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-15-53e8dffb97a1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_bert_en\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_bert_de\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mclf_t\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGaussianProcessRegressor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mparams_rq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0mclf_t\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0msub_predictions\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclf_t\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: __init__() got an unexpected keyword argument 'alpha_bounds'"
"name": "stdout",
"output_type": "stream",
"text": [
"Best Parameters for RationalQuadratic(alpha=1, length_scale=1)\n",
"{'alpha_bounds': (1e-05, 100000.0), 'length_scale': 0.026872765898750588, 'length_scale_bounds': (1e-05, 100000.0), 'alpha': 0.0821329110167268}\n",
"[ 0.13366104 -0.00518545 0.06639134 0.06469153 -0.1009511 ]\n"
]
}
],
"source": [
"## train Gaussian Process Regressor (kernel=RationalQuadratic) with train + dev\n",
"from sklearn.gaussian_process import GaussianProcessRegressor\n",
"from sklearn.gaussian_process.kernels import RationalQuadratic\n",
"\n",
"X_train = np.concatenate((combined_train_src, combined_train_mt), axis=1)\n",
"y_train = np.array(combined_train_scores).astype(float)\n",
"\n",
"X_test = np.concatenate((test_bert_en, test_bert_de), axis=1)\n",
"\n",
"clf_t = GaussianProcessRegressor(**params_rq)\n",
"clf_t.fit(X_train, y_train)\n",
"sub_predictions = clf_t.predict(X_test)"
"kernel = RationalQuadratic()\n",
"\n",
"model = GaussianProcessRegressor(kernel=kernel)\n",
"model.fit(X_train, y_train)\n",
"params_rq = model.kernel_.get_params()\n",
"\n",
"print(\"Best Parameters for {}\".format(kernel))\n",
"print(params_rq)\n",
"\n",
"y_pred_rq_test = model.predict(X_test)\n",
"print(y_pred_rq_test[:5])"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"#Generate zip file required for submission to challenge website\n",
"\n",
"from zipfile import ZipFile\n",
"\n",
"writeScores(y_pred_rq_test)\n",
"\n",
"with ZipFile(\"en-de_gprq.zip\",\"w\") as newzip:\n",
" newzip.write(\"predictions.txt\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hello\n"
]
}
],
"source": [
"print(sub_predictions[:10])"
"print(\"hello\")"
]
},
{
......
File added