Skip to content
Snippets Groups Projects
Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb 332 KiB
Newer Older
  • Learn to ignore specific revisions
  • Ella's avatar
    Ella committed
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>15516</th>\n",
           "      <td>873</td>\n",
           "      <td>cite the fact that these kids world health org...</td>\n",
           "      <td>1</td>\n",
           "      <td>2</td>\n",
           "      <td>0.0</td>\n",
           "      <td>31.500000</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>15517</th>\n",
           "      <td>10070</td>\n",
           "      <td>Fern ? ndez was a well-known philanthropist wo...</td>\n",
           "      <td>1</td>\n",
           "      <td>2</td>\n",
           "      <td>0.0</td>\n",
           "      <td>19.500000</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>15518</th>\n",
           "      <td>6484</td>\n",
           "      <td>touch on a lot away their predicament , comman...</td>\n",
           "      <td>1</td>\n",
           "      <td>2</td>\n",
           "      <td>0.0</td>\n",
           "      <td>27.500000</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>15519</th>\n",
           "      <td>6249</td>\n",
           "      <td>She iterate her ministry 's commitment to put ...</td>\n",
           "      <td>1</td>\n",
           "      <td>2</td>\n",
           "      <td>0.0</td>\n",
           "      <td>18.500000</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>15520</th>\n",
           "      <td>5149</td>\n",
           "      <td>preach the sermon , the Dean of the St. Peter ...</td>\n",
           "      <td>1</td>\n",
           "      <td>4</td>\n",
           "      <td>0.0</td>\n",
           "      <td>15.750000</td>\n",
           "    </tr>\n",
           "  </tbody>\n",
           "</table>\n",
           "<p>15521 rows × 6 columns</p>\n",
           "</div>"
    
    Azhara's avatar
    Azhara committed
          ],
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "      par_id                                               text  label  \\\n",
           "0       4341  The scheme saw an estimated 150,000 children f...      1   \n",
           "1       4136  Durban 's homeless communities reconciliation ...      1   \n",
           "2      10352  The next immediate problem that cropped up was...      1   \n",
           "3       8279  Far more important than the implications for t...      1   \n",
           "4       1164  To strengthen child-sensitive social protectio...      1   \n",
           "...      ...                                                ...    ...   \n",
           "15516    873  cite the fact that these kids world health org...      1   \n",
           "15517  10070  Fern ? ndez was a well-known philanthropist wo...      1   \n",
           "15518   6484  touch on a lot away their predicament , comman...      1   \n",
           "15519   6249  She iterate her ministry 's commitment to put ...      1   \n",
           "15520   5149  preach the sermon , the Dean of the St. Peter ...      1   \n",
           "\n",
           "       num_sentences_in_paragraph  readability_score  avg_sentence_length  \n",
           "0                               2                0.0            18.500000  \n",
           "1                               1                0.0             6.000000  \n",
           "2                               3                0.0            24.666667  \n",
           "3                               2                0.0            23.000000  \n",
           "4                               2                0.0            25.000000  \n",
           "...                           ...                ...                  ...  \n",
           "15516                           2                0.0            31.500000  \n",
           "15517                           2                0.0            19.500000  \n",
           "15518                           2                0.0            27.500000  \n",
           "15519                           2                0.0            18.500000  \n",
           "15520                           4                0.0            15.750000  \n",
           "\n",
           "[15521 rows x 6 columns]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 72,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "training_set1_synonyms = trdf1_synonym\n",
        "training_set1_synonyms"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 73,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 538,
         "referenced_widgets": [
          "de026e2d1ec848fbb35faf1746e7579d",
          "7a0d26d292e54498a15bc2e54e6c3aee",
          "13bc966911b1433ab6f8245f88b86e2d",
          "ec69c0f693b74ac8b737816b1efaa8bf",
          "cf4fb870564043cfbc171ae05143ec5f",
          "abc4d60485bf40d7abf3cbcf212e39a2",
          "85ea942b0e194d2a9160f82494fff8e0",
          "6b2a3cde2cdb4bc6a9d0623363c66266",
          "a287d92682d644daa34030f58227540f",
          "2eea385dfcf745e6b92c37eb6cd901f4",
          "59c41a4934cf4ef2b467ae5d97b615f0",
          "a90282c995cc4c6db59eaa51e3414ad1",
          "d76e461fca524e04810c38b44ad51185",
          "b4d34c0c2e364f88b70036cbff6f780a",
          "4d22ac94c09d4802b4664e456278308a",
          "594d094f060e4481977901e11c2ddaf2",
          "b276fc7fd7954ec09fabbcad64b93508",
          "29ad13a85dc44e4e919bed1daddf0935",
          "c0b81615b8fc4b8d8a48232d029e717d",
          "c1dc117db5c444bd918b9ab70b587f48",
          "fc14f76deb2549b3a5c6bce9ea3dd7e3",
          "a8757d3393f04a55a5bea7f923944281",
          "df46941823a540f787e2e9dad52243d3",
          "904714fa04be424da224c801f06d52f6",
          "790e423a21b840978da497d9d35634a6",
          "6cc05e8394ac491999930b9493be855e",
          "8cefe2ef3c984e3a8ad1bb8ecb7f0dbb",
          "4ec670884e2247ec872defaa5bf7dcc5",
          "3d7e4c3c0af84bc4940d2e25ad8a5162",
          "4f8aa1c4c8914693a24523c1064ecf97",
          "ab2d97c595ff4e55969bc2f1415f747d",
          "60b8f43949894143a83263e96d0ac3fa",
          "703f69d98e9947f3aeaba07b51d64f3c",
          "59458022ec4547beba130ce755946427",
          "26178b1ed5164ca7bcea84b65fc95522",
          "ac45cf1a895f4ba39c5be0ad136c4591",
          "117489e4902040ae97cb222f8c4dda12",
          "ba09b68a05644dd1b7d12a19396fb3b8",
          "37531d1ef87f45c99df60fb6a8a66d78",
          "1c61f90f85a54cbb9d2ccdbf614eb8d5",
          "c11ef82d87114a59868d9d7810b936db",
          "a8de20f8116142f6bf25aef25f37ad85",
          "32fe020b5dc54143af2830ec578eecdf",
          "2fcffe85dbcc4c91b840c61799b82772",
          "2c3f59321aac4ea78e4c70618f907e08",
          "ff3206118d3043b3a9b757ec3ca01d9f",
          "d1343667008643c8a893bfae962503c1",
          "384f2a4804ed458498b35f04d32e41b5",
          "f7e1e00cad5449dfb55c6a920a292540",
          "ec68ce238bb04063a74afd67a4f1e877",
          "05e6b65f527c48e5ab8aaa8a220287c0",
          "ee3cdac0740d43188518e097ae5863a5",
          "6f8e6355fcb74fc98cc641bc36bd1707",
          "19628b1c68004e9a98ecb5a934d57d2d",
          "cafdb944408b42eaacec5b2e8566962b",
          "b8654137c93f4a70b0fb8f7e692f7673",
          "b088cb07387d4a46b3c7d4d7cc9a5aba",
          "e567e754183b42d69d3d200a4517b132",
          "e4c905c262994b7d9197f77b2da2c178",
          "f0b59d4d691c44a4a7dbb40991518d36",
          "fe372945c13845e5bd01ca4e153ed61c",
          "c3a17fa9e8da4d9fb571e307f75ac144",
          "23ee94259c41435ea19264f4a2937585",
          "c3cafc815e4444a8998e608cd9c9300d",
          "49fd3eaa74fa493fbe7324607888c542",
          "6870cb8717ba407c81d0b7c79ad27d26",
          "d32db030dff24fe3bd1983a44c703311",
          "ee1cc81a307a4ea6ac00b65a4d314877",
          "d16d01773a334ccb8a9231d3cae6e0f4",
          "c43f7ff02eb74e0d900029f97652a973",
          "d6a37bb80db94947810fa1d69b8b416b",
          "4f8b629635194d759b9c91f37a445632",
          "d48d137548464c3387dd51494cb969bf",
          "89f9d8cab0c44b58afee34914198a9e6",
          "3119accf975446eda52b0125571322a4",
          "bac5decd48b74036a270f70a05c3f80e",
          "2c646f33655d4646b70afef87232d5a1",
          "82f733de38ef4c5f8734cbdf47ec1a01",
          "818e5dbea2094a97a18974aebe4ddb86",
          "c2dae6a605f842d3aaad1f86b2e332b6",
          "197dca1cad8b425bb4622c9d5da461dc",
          "1cf670526680446db48fbf64b1cae0a6",
          "ece446ccb0d3496a81dc0250fff9039d",
          "d7d3e0bd4a9c43dd9946a8a63b12d1d7",
          "f879197b8c2443278e280f82f20cc609",
          "c983b2e75ec44867a14f6d3abdfff2ec",
          "d1df90f8eead4f5e8718fd42ec0d5e40",
          "b3f807cfae5a44fcbf1069817e6fb79d",
          "a308011d2efd403fb6b8d250d8f98fa9",
          "770ad4abfc4f4eeebefade5afcad22a3",
          "226d2e2955444f9ead05b5991d7939e5",
          "9f03facdd7704ab9be2bd65f1060f215",
          "fc122d487cbd4ab69e78f1b9425774ba",
          "21c2295bc2a54d979f2a5404f6a7ecb1",
          "b4bcc997cfa4444287370bbb4819cb43",
          "4fbbc5b908c14198b8c86ca2208bd890",
          "5bc982f17aec46b3b7f8308f567f6296",
          "f1fe964e4d2b4ebb8f76b3d8debdaab6",
          "7e3df3b58c7e4117b0a8b8fc45dffa01",
          "6851a6f291a04bd59da6cf04d6f762aa",
          "3cd79b7a96784504a28dce435f9de5b5",
          "d79bd01e1fe44c12ac530376324d9e0e",
          "4a03ac52afa84bfd82ced188892b1958",
          "8351a84b9c914d80a54ac57a2fb18304",
          "419ce58d2ed14589a3d2e1cd7b477e0f",
          "568d424c94094d669282ed72b4cf0be4",
          "7fe11f4ffd3747079b8de4453b29dbac",
          "22a8be9fae3245cca79dc7904ed4f49d",
          "ed1ad3ad89334675babf56f9737a17fb",
          "dba102bf2b3249c8907d4fe1d5cf0e82"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "PoW_s23AZ_DG",
        "outputId": "cae26801-a680-4d5b-9d76-d56872b5e611"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to acquire lock 139946483054384 on /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b.lock\n",
          "DEBUG:filelock:Lock 139946483054384 acquired on /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b.lock\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "8be5a63b87d546439dc5b2c102a53333",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to release lock 139946483054384 on /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b.lock\n",
          "DEBUG:filelock:Lock 139946483054384 released on /root/.cache/huggingface/transformers/733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b.lock\n",
          "DEBUG:filelock:Attempting to acquire lock 139946480692576 on /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7.lock\n",
          "DEBUG:filelock:Lock 139946480692576 acquired on /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7.lock\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "7de8d7262f2e4875a78e204e47c5c477",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Azhara's avatar
    Azhara committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to release lock 139946480692576 on /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7.lock\n",
          "DEBUG:filelock:Lock 139946480692576 released on /root/.cache/huggingface/transformers/51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7.lock\n",
          "Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias']\n",
          "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
          "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
          "Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight']\n",
          "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
          "DEBUG:filelock:Attempting to acquire lock 139948825858928 on /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock\n",
          "DEBUG:filelock:Lock 139948825858928 acquired on /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock\n"
         ]
    
    Azhara's avatar
    Azhara committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "8b6b8c768b80452f9702586768393fde",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Azhara's avatar
    Azhara committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to release lock 139948825858928 on /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock\n",
          "DEBUG:filelock:Lock 139948825858928 released on /root/.cache/huggingface/transformers/d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab.lock\n",
          "DEBUG:filelock:Attempting to acquire lock 139948825440848 on /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock\n",
          "DEBUG:filelock:Lock 139948825440848 acquired on /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock\n"
         ]
    
    Azhara's avatar
    Azhara committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "3809e72b9da5471d9b009e1c957e5d6e",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Azhara's avatar
    Azhara committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to release lock 139948825440848 on /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock\n",
          "DEBUG:filelock:Lock 139948825440848 released on /root/.cache/huggingface/transformers/cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b.lock\n",
          "DEBUG:filelock:Attempting to acquire lock 139946483054384 on /root/.cache/huggingface/transformers/d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock\n",
          "DEBUG:filelock:Lock 139946483054384 acquired on /root/.cache/huggingface/transformers/d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock\n"
         ]
    
    Azhara's avatar
    Azhara committed
        },
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "70ac437a0b2e457ba4cf52a91af4b433",
           "version_major": 2,
           "version_minor": 0
    
    Emily Haw's avatar
    Emily Haw committed
          },
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "DEBUG:filelock:Attempting to release lock 139946483054384 on /root/.cache/huggingface/transformers/d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock\n",
          "DEBUG:filelock:Lock 139946483054384 released on /root/.cache/huggingface/transformers/d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730.lock\n",
          "/opt/conda/lib/python3.8/site-packages/simpletransformers/classification/classification_model.py:585: UserWarning: Dataframe headers not specified. Falling back to using column 0 as text and column 1 as labels.\n",
          "  warnings.warn(\n",
          "INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "33ee72e416fc40338680953e5ae30ed2",
           "version_major": 2,
           "version_minor": 0
    
    Emily Haw's avatar
    Emily Haw committed
          },
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "  0%|          | 0/15521 [00:00<?, ?it/s]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
          "  warnings.warn(\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "dcc3d18209bb46c889cf017a9bcb732d",
           "version_major": 2,
           "version_minor": 0
    
    Emily Haw's avatar
    Emily Haw committed
          },
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "Epoch:   0%|          | 0/1 [00:00<?, ?it/s]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "7c6ec7ac84154b36b662a0d69f7e5726",
           "version_major": 2,
           "version_minor": 0
    
    Emily Haw's avatar
    Emily Haw committed
          },
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "Running Epoch 0 of 1:   0%|          | 0/1941 [00:00<?, ?it/s]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.\n",
          "INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "c45aceb741f34cd7b6c41375d72ac0ce",
           "version_major": 2,
           "version_minor": 0
          },
          "text/plain": [
           "  0%|          | 0/2094 [00:00<?, ?it/s]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
    
    Azhara's avatar
    Azhara committed
        },
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "application/vnd.jupyter.widget-view+json": {
           "model_id": "1a924855255f4f429268ead11535b7f7",
           "version_major": 2,
           "version_minor": 0
    
    Azhara's avatar
    Azhara committed
          },
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "  0%|          | 0/262 [00:00<?, ?it/s]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "metadata": {},
         "output_type": "display_data"
        }
       ],
       "source": [
        "task1_model_args = ClassificationArgs(num_train_epochs=1, \n",
        "                                      no_save=True, \n",
        "                                      no_cache=True, \n",
        "                                      overwrite_output_dir=True)\n",
        "task1_model = ClassificationModel(\"roberta\", \n",
        "                                  'roberta-base', \n",
        "                                  args = task1_model_args, \n",
        "                                  num_labels=2, \n",
        "                                  use_cuda=cuda_available)\n",
        "# train model\n",
        "task1_model.train_model(training_set1_synonyms[['text', 'label']])\n",
        "# run predictions\n",
        "preds_task1, _ = task1_model.predict(tedf1.text.tolist())"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 74,
       "metadata": {},
       "outputs": [
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/plain": [
           "Counter({0: 1996, 1: 98})"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 74,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "Counter(preds_task1)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 79,
       "metadata": {},
       "outputs": [
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/plain": [
           "0.9135625596943648"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 79,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "test_labels = tedf1.label.to_list()\n",
        "correct = 0\n",
        "for i in range(len(preds_task1)):\n",
        "    correct += preds_task1[i] == test_labels[i]\n",
        "correct / len(preds_task1)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "k7Cc_u5Oli7j"
       },
       "source": [
        "# Rebuild training set (Task 2)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "D2WLYT7wli7k"
       },
       "outputs": [],
       "source": [
        "rows2 = [] # will contain par_id, label and text\n",
        "for idx in range(len(trids)):  \n",
        "  parid = trids.par_id[idx]\n",
        "  label = trids.label[idx]\n",
        "  # select row from original dataset to retrieve the `text` value\n",
        "  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]\n",
        "  rows2.append({\n",
        "      'par_id':parid,\n",
        "      'text':text,\n",
        "      'label':label\n",
        "  })\n",
        "  "
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "LFqMMb5Jli7l"
       },
       "outputs": [],
       "source": [
        "trdf2 = pd.DataFrame(rows2)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 422
    
    Azhara's avatar
    Azhara committed
        },
    
    Ella's avatar
    Ella committed
        "id": "HayrC9q7mQPl",
        "outputId": "db5f1bdf-c09a-4a57-f81e-612100e32b44"
       },
       "outputs": [
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>text</th>\n",
           "      <th>label</th>\n",
           "    </tr>\n",
           "  </thead>\n",
           "  <tbody>\n",
           "    <tr>\n",
           "      <th>0</th>\n",
           "      <td>4341</td>\n",
           "      <td>the scheme saw an estimated 150,000 children f...</td>\n",
           "      <td>[1, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1</th>\n",
           "      <td>4136</td>\n",
           "      <td>durban 's homeless communities reconciliation ...</td>\n",
           "      <td>[0, 1, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2</th>\n",
           "      <td>10352</td>\n",
           "      <td>the next immediate problem that cropped up was...</td>\n",
           "      <td>[1, 0, 0, 0, 0, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>3</th>\n",
           "      <td>8279</td>\n",
           "      <td>far more important than the implications for t...</td>\n",
           "      <td>[0, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>4</th>\n",
           "      <td>1164</td>\n",
           "      <td>to strengthen child-sensitive social protectio...</td>\n",
           "      <td>[1, 0, 0, 1, 1, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>...</th>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>8370</th>\n",
           "      <td>8380</td>\n",
           "      <td>rescue teams search for survivors on the rubbl...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>8371</th>\n",
           "      <td>8381</td>\n",
           "      <td>the launch of ' happy birthday ' took place la...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>8372</th>\n",
           "      <td>8382</td>\n",
           "      <td>the unrest has left at least 20,000 people dea...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>8373</th>\n",
           "      <td>8383</td>\n",
           "      <td>you have to see it from my perspective . i may...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>8374</th>\n",
           "      <td>8384</td>\n",
           "      <td>yet there was one occasion when we went to the...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "  </tbody>\n",
           "</table>\n",
           "<p>8375 rows × 3 columns</p>\n",
           "</div>"
    
    Azhara's avatar
    Azhara committed
          ],
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "     par_id  ...                  label\n",
           "0      4341  ...  [1, 0, 0, 1, 0, 0, 0]\n",
           "1      4136  ...  [0, 1, 0, 0, 0, 0, 0]\n",
           "2     10352  ...  [1, 0, 0, 0, 0, 1, 0]\n",
           "3      8279  ...  [0, 0, 0, 1, 0, 0, 0]\n",
           "4      1164  ...  [1, 0, 0, 1, 1, 1, 0]\n",
           "...     ...  ...                    ...\n",
           "8370   8380  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "8371   8381  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "8372   8382  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "8373   8383  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "8374   8384  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "\n",
           "[8375 rows x 3 columns]"
    
    Azhara's avatar
    Azhara committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 26,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "trdf2"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "MxHLB_g0pfEb"
       },
       "outputs": [],
       "source": [
        "trdf2.label = trdf2.label.apply(literal_eval)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "Gukbmv0bli7l"
       },
       "source": [
        "# Rebuild test set (Task 2)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "gjH-AJK1li7m"
       },
       "outputs": [],
       "source": [
        "rows2 = [] # will contain par_id, label and text\n",
        "for idx in range(len(teids)):  \n",
        "  parid = teids.par_id[idx]\n",
        "  label = teids.label[idx]\n",
        "  #print(parid)\n",
        "  # select row from original dataset to access the `text` value\n",
        "  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]\n",
        "  rows2.append({\n",
        "      'par_id':parid,\n",
        "      'text':text,\n",
        "      'label':label\n",
        "  })\n",
        "  "
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "SRP-tn5wli7n"
       },
       "outputs": [],
       "source": [
        "tedf2 = pd.DataFrame(rows2)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 422
    
    Azhara's avatar
    Azhara committed
        },
    
    Ella's avatar
    Ella committed
        "id": "8U2lrfJiolku",
        "outputId": "6bf1181c-3e95-4913-cceb-9cc9e08b6c29"
       },
       "outputs": [
    
    Azhara's avatar
    Azhara committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>text</th>\n",
           "      <th>label</th>\n",
           "    </tr>\n",
           "  </thead>\n",
           "  <tbody>\n",
           "    <tr>\n",
           "      <th>0</th>\n",
           "      <td>4046</td>\n",
           "      <td>we also know that they can benefit by receivin...</td>\n",
           "      <td>[1, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1</th>\n",
           "      <td>1279</td>\n",
           "      <td>pope francis washed and kissed the feet of mus...</td>\n",
           "      <td>[0, 1, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2</th>\n",
           "      <td>8330</td>\n",
           "      <td>many refugees do n't want to be resettled anyw...</td>\n",
           "      <td>[0, 0, 1, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>3</th>\n",
           "      <td>4063</td>\n",
           "      <td>\"budding chefs , like \"\" fred \"\" , \"\" winston ...</td>\n",
           "      <td>[1, 0, 0, 1, 1, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>4</th>\n",
           "      <td>4089</td>\n",
           "      <td>\"in a 90-degree view of his constituency , one...</td>\n",
           "      <td>[1, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>...</th>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2089</th>\n",
           "      <td>10462</td>\n",
           "      <td>the sad spectacle , which occurred on saturday...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2090</th>\n",
           "      <td>10463</td>\n",
           "      <td>\"\"\" the pakistani police came to our house and...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2091</th>\n",
           "      <td>10464</td>\n",
           "      <td>\"when marie o'donoghue went looking for a spec...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2092</th>\n",
           "      <td>10465</td>\n",
           "      <td>\"sri lankan norms and culture inhibit women fr...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2093</th>\n",
           "      <td>10466</td>\n",
           "      <td>he added that the afp will continue to bank on...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "  </tbody>\n",
           "</table>\n",
           "<p>2094 rows × 3 columns</p>\n",
           "</div>"
    
    Emily Haw's avatar
    Emily Haw committed
          ],
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "     par_id  ...                  label\n",
           "0      4046  ...  [1, 0, 0, 1, 0, 0, 0]\n",
           "1      1279  ...  [0, 1, 0, 0, 0, 0, 0]\n",
           "2      8330  ...  [0, 0, 1, 0, 0, 0, 0]\n",
           "3      4063  ...  [1, 0, 0, 1, 1, 1, 0]\n",
           "4      4089  ...  [1, 0, 0, 0, 0, 0, 0]\n",
           "...     ...  ...                    ...\n",
           "2089  10462  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "2090  10463  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "2091  10464  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "2092  10465  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "2093  10466  ...  [0, 0, 0, 0, 0, 0, 0]\n",
           "\n",
           "[2094 rows x 3 columns]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 30,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "tedf2"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "81aQFjWqpbe2"
       },
       "outputs": [],
       "source": [
        "tedf2.label = tedf2.label.apply(literal_eval)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "YKFiVaslbAiC"
       },
       "source": [
        "# RoBERTa baseline for Task 2"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "id": "hmr5ZZf5Ik5T"
       },
       "outputs": [],
       "source": [
        "all_negs = trdf2[trdf2.label.apply(lambda x:sum(x) == 0)]\n",
        "all_pos = trdf2[trdf2.label.apply(lambda x:sum(x) > 0)]\n",
        "\n",
        "training_set2 = pd.concat([all_pos,all_negs[:round(len(all_pos)*0.5)]])"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 422
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "zyBcJoHtJHE2",
        "outputId": "983b27a6-3bec-47bc-e564-79face4b061c"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>text</th>\n",
           "      <th>label</th>\n",
           "    </tr>\n",
           "  </thead>\n",
           "  <tbody>\n",
           "    <tr>\n",
           "      <th>0</th>\n",
           "      <td>4341</td>\n",
           "      <td>the scheme saw an estimated 150,000 children f...</td>\n",
           "      <td>[1, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1</th>\n",
           "      <td>4136</td>\n",
           "      <td>durban 's homeless communities reconciliation ...</td>\n",
           "      <td>[0, 1, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2</th>\n",
           "      <td>10352</td>\n",
           "      <td>the next immediate problem that cropped up was...</td>\n",
           "      <td>[1, 0, 0, 0, 0, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>3</th>\n",
           "      <td>8279</td>\n",
           "      <td>far more important than the implications for t...</td>\n",
           "      <td>[0, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>4</th>\n",
           "      <td>1164</td>\n",
           "      <td>to strengthen child-sensitive social protectio...</td>\n",
           "      <td>[1, 0, 0, 1, 1, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>...</th>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "      <td>...</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1186</th>\n",
           "      <td>434</td>\n",
           "      <td>\"\"\" i was absolutely useless at school , hopel...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1187</th>\n",
           "      <td>435</td>\n",
           "      <td>i also noticed the change in socio-economic le...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1188</th>\n",
           "      <td>436</td>\n",
           "      <td>can donald trump win ? it 's possible , but ce...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1189</th>\n",
           "      <td>437</td>\n",
           "      <td>he added that any introduction of new law must...</td>\n",
           "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",