From 773f032e01815dc45cf82d52c5dc01d8ec20ab09 Mon Sep 17 00:00:00 2001 From: efb4518 <efb4518@ic.ac.uk> Date: Thu, 3 Mar 2022 12:01:12 +0000 Subject: [PATCH] Translate fix --- ...d_RoBERTa_baseline_train_dev_dataset.ipynb | 29 +++---------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb b/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb index af9ee9b..2c8e565 100644 --- a/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb +++ b/Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb @@ -1716,38 +1716,16 @@ "source": [ "def generate_new_sent(sent):\n", " new_sent = sent.copy()\n", - " # print(\"NEW SENT: \", new_sent)\n", - " # print(\"before: \", new_sent.iloc[0,1])\n", " new_sent.iloc[0, 1] = translate_sent(new_sent.iloc[0].at[\"text\"])\n", - " # print(\"afer: \", new_sent.iloc[0,1])\n", - " # new_sent[\"text\"] = translate_sent(new_sent[\"text\"])\n", " return new_sent\n", "\n", "def apply_translate(data, percentage):\n", - " # set_to_change = np.random.choice(data, len(data)*percentage)\n", - " # print(len(data))\n", " data_size = len(data)\n", " inds_to_change = np.random.randint(low=0, high=data_size, size=int(data_size*percentage))\n", - " # print(inds_to_change)\n", - "\n", - " # print(\"data iloc: \", data.iloc[[0]])\n", - " # print(\"data iloc text: \", data.iloc[[0]][\"text\"])\n", " \n", " translated_set = pd.concat([generate_new_sent(data.iloc[[i]]) for i in inds_to_change])\n", - " # print(\"translated set: \", translated_set)\n", " augmented_set = pd.concat([data,translated_set], ignore_index=True)\n", - " return augmented_set\n", - "\n", - "\n", - "# data = [\"Cat in the wall, eh?! Okay, now you’re talking my language.\",\n", - "# \"You gotta pay the troll toll, if you wanna get into that boy’s soul. \",\n", - "# \"Mac, I’m gonna stop you right there. First of all, your breath smells like an old lady fart passing through an onion. Secondly, I know you’re trying to manipulate me, and it’s not going to work. Get your hand off my shoulder, because I’ve got a fatty to burn.\",\n", - "# \"Here’s a confession: I’m in love with a man. What? I’m in love with a man. A man called God. Does that make me gay? Am I gay for God? You betcha!\",\n", - "# \"Hi. Um, I’m a recovering crackhead. This is my retarded sister that I take care of. I’d like some welfare, please. \",\n", - "# \"I got my Magnum condoms; I got my wad of hundreds. I’m ready to plow. \"\n", - "# ]\n", - "\n", - "# print(apply_translate(data, 1))" + " return augmented_set\n" ] }, { @@ -1807,8 +1785,9 @@ " augmented_data, non_pat_data = apply_embedding(data, word_percent, data_precent)\n", " new_df = pd.concat([new_df, augmented_data, non_pat_data], ignore_index=True)\n", " if use_translate:\n", - " augmented = apply_translate(data, percentage)\n", - " new_df = pd.concat([new_df, augmented], ignore_index=True)\n", + " augmented = apply_translate(data, translate_percent)\n", + " new_df = augmented\n", + " # new_df = pd.concat([new_df, augmented], ignore_index=True)\n", "\n", " return new_df\n", "\n", -- GitLab