From 773f032e01815dc45cf82d52c5dc01d8ec20ab09 Mon Sep 17 00:00:00 2001
From: efb4518 <efb4518@ic.ac.uk>
Date: Thu, 3 Mar 2022 12:01:12 +0000
Subject: [PATCH] Translate fix

---
 ...d_RoBERTa_baseline_train_dev_dataset.ipynb | 29 +++----------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb b/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb
index af9ee9b..2c8e565 100644
--- a/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb
+++ b/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb
@@ -1716,38 +1716,16 @@
    "source": [
     "def generate_new_sent(sent):\n",
     "    new_sent = sent.copy()\n",
-    "    # print(\"NEW SENT: \", new_sent)\n",
-    "    # print(\"before: \", new_sent.iloc[0,1])\n",
     "    new_sent.iloc[0, 1] = translate_sent(new_sent.iloc[0].at[\"text\"])\n",
-    "    # print(\"afer: \", new_sent.iloc[0,1])\n",
-    "    # new_sent[\"text\"] = translate_sent(new_sent[\"text\"])\n",
     "    return new_sent\n",
     "\n",
     "def apply_translate(data, percentage):\n",
-    "    # set_to_change = np.random.choice(data, len(data)*percentage)\n",
-    "    # print(len(data))\n",
     "    data_size = len(data)\n",
     "    inds_to_change = np.random.randint(low=0, high=data_size, size=int(data_size*percentage))\n",
-    "    # print(inds_to_change)\n",
-    "\n",
-    "    # print(\"data iloc: \", data.iloc[[0]])\n",
-    "    # print(\"data iloc text: \", data.iloc[[0]][\"text\"])\n",
     "    \n",
     "    translated_set = pd.concat([generate_new_sent(data.iloc[[i]]) for i in inds_to_change])\n",
-    "    # print(\"translated set: \", translated_set)\n",
     "    augmented_set = pd.concat([data,translated_set], ignore_index=True)\n",
-    "    return augmented_set\n",
-    "\n",
-    "\n",
-    "# data = [\"Cat in the wall, eh?! Okay, now youâ€™re talking my language.\",\n",
-    "#         \"You gotta pay the troll toll, if you wanna get into that boyâ€™s soul. \",\n",
-    "#         \"Mac, Iâ€™m gonna stop you right there. First of all, your breath smells like an old lady fart passing through an onion. Secondly, I know youâ€™re trying to manipulate me, and itâ€™s not going to work. Get your hand off my shoulder, because Iâ€™ve got a fatty to burn.\",\n",
-    "#         \"Hereâ€™s a confession: Iâ€™m in love with a man. What? Iâ€™m in love with a man. A man called God. Does that make me gay? Am I gay for God? You betcha!\",\n",
-    "#         \"Hi. Um, Iâ€™m a recovering crackhead. This is my retarded sister that I take care of. Iâ€™d like some welfare, please. \",\n",
-    "#         \"I got my Magnum condoms; I got my wad of hundreds. Iâ€™m ready to plow. \"\n",
-    "# ]\n",
-    "\n",
-    "# print(apply_translate(data, 1))"
+    "    return augmented_set\n"
    ]
   },
   {
@@ -1807,8 +1785,9 @@
     "        augmented_data, non_pat_data = apply_embedding(data, word_percent, data_precent)\n",
     "        new_df = pd.concat([new_df, augmented_data, non_pat_data], ignore_index=True)\n",
     "    if use_translate:\n",
-    "        augmented = apply_translate(data, percentage)\n",
-    "        new_df = pd.concat([new_df, augmented], ignore_index=True)\n",
+    "        augmented = apply_translate(data, translate_percent)\n",
+    "        new_df = augmented\n",
+    "        # new_df = pd.concat([new_df, augmented], ignore_index=True)\n",
     "\n",
     "    return new_df\n",
     "\n",
-- 
GitLab