Skip to content
Snippets Groups Projects
Reconstruct_and_RoBERTa_baseline_train_dev_dataset.ipynb 332 KiB
Newer Older
  • Learn to ignore specific revisions
  • Azhara's avatar
    Azhara committed
    {
    
    Ella's avatar
    Ella committed
     "cells": [
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "H08esTFOYO99"
       },
       "source": [
        "# Main imports and code"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 1,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "EnHQoayhBYlm",
        "outputId": "eb747576-9a2a-474c-dbc7-4d6c042f68e6"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Sat Feb 26 13:12:45 2022       \n",
          "+-----------------------------------------------------------------------------+\n",
          "| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.4     |\n",
          "|-------------------------------+----------------------+----------------------+\n",
          "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
          "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
          "|                               |                      |               MIG M. |\n",
          "|===============================+======================+======================|\n",
          "|   0  Quadro P4000        Off  | 00000000:00:05.0 Off |                  N/A |\n",
          "| 46%   33C    P8     5W / 105W |      0MiB /  8119MiB |      0%      Default |\n",
          "|                               |                      |                  N/A |\n",
          "+-------------------------------+----------------------+----------------------+\n",
          "                                                                               \n",
          "+-----------------------------------------------------------------------------+\n",
          "| Processes:                                                                  |\n",
          "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
          "|        ID   ID                                                   Usage      |\n",
          "|=============================================================================|\n",
          "|  No running processes found                                                 |\n",
          "+-----------------------------------------------------------------------------+\n",
          "WARNING: infoROM is corrupted at gpu 0000:00:05.0\n"
         ]
        }
       ],
       "source": [
        "# check which gpu we're using\n",
        "!nvidia-smi"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 2,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 1000
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "hYhFR7nSYOjG",
        "outputId": "23ed0686-29d3-45ff-dc22-b2fe54e86ec4"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
          "Collecting simpletransformers\n",
          "  Downloading simpletransformers-0.63.4-py3-none-any.whl (248 kB)\n",
          "\u001B[K     |████████████████████████████████| 248 kB 16.4 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting tensorflow\n",
          "  Downloading tensorflow-2.8.0-cp38-cp38-manylinux2010_x86_64.whl (497.6 MB)\n",
          "\u001B[K     |████████████████████████████████| 497.6 MB 27.2 MB/s eta 0:00:01     |████████████▎                   | 190.3 MB 20.1 MB/s eta 0:00:16\n",
          "\u001B[?25hRequirement already satisfied: scikit-learn in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (1.0)\n",
          "Requirement already satisfied: scipy in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (1.6.3)\n",
          "Requirement already satisfied: regex in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (2021.10.8)\n",
          "Collecting tokenizers\n",
          "  Downloading tokenizers-0.11.5-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)\n",
          "\u001B[K     |████████████████████████████████| 6.8 MB 28.6 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (1.21.2)\n",
          "Collecting datasets\n",
          "  Downloading datasets-1.18.3-py3-none-any.whl (311 kB)\n",
          "\u001B[K     |████████████████████████████████| 311 kB 31.9 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting pandas\n",
          "  Downloading pandas-1.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)\n",
          "\u001B[K     |████████████████████████████████| 11.7 MB 27.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting wandb>=0.10.32\n",
          "  Downloading wandb-0.12.10-py2.py3-none-any.whl (1.7 MB)\n",
          "\u001B[K     |████████████████████████████████| 1.7 MB 32.9 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting transformers>=4.6.0\n",
          "  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)\n",
          "\u001B[K     |████████████████████████████████| 3.5 MB 29.6 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: tensorboard in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (2.6.0)\n",
          "Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (2.26.0)\n",
          "Collecting sentencepiece\n",
          "  Downloading sentencepiece-0.1.96-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
          "\u001B[K     |████████████████████████████████| 1.2 MB 29.2 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting seqeval\n",
          "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
          "\u001B[K     |████████████████████████████████| 43 kB 41.0 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting streamlit\n",
          "  Downloading streamlit-1.6.0-py2.py3-none-any.whl (9.7 MB)\n",
          "\u001B[K     |████████████████████████████████| 9.7 MB 33.1 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: tqdm>=4.47.0 in /opt/conda/lib/python3.8/site-packages (from simpletransformers) (4.62.3)\n",
          "Collecting gast>=0.2.1\n",
          "  Downloading gast-0.5.3-py3-none-any.whl (19 kB)\n",
          "Requirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.16.0)\n",
          "Requirement already satisfied: absl-py>=0.4.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (0.14.1)\n",
          "Collecting opt-einsum>=2.3.2\n",
          "  Downloading opt_einsum-3.3.0-py3-none-any.whl (65 kB)\n",
          "\u001B[K     |████████████████████████████████| 65 kB 31.3 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting keras-preprocessing>=1.1.1\n",
          "  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)\n",
          "\u001B[K     |████████████████████████████████| 42 kB 28.8 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.10.0.2)\n",
          "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.41.0)\n",
          "Collecting wrapt>=1.11.0\n",
          "  Downloading wrapt-1.13.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (84 kB)\n",
          "\u001B[K     |████████████████████████████████| 84 kB 46.4 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: setuptools in /opt/conda/lib/python3.8/site-packages (from tensorflow) (58.2.0)\n",
          "Collecting tensorboard\n",
          "  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)\n",
          "\u001B[K     |████████████████████████████████| 5.8 MB 28.6 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting astunparse>=1.6.0\n",
          "  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n",
          "Collecting h5py>=2.9.0\n",
          "  Downloading h5py-3.6.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.5 MB)\n",
          "\u001B[K     |████████████████████████████████| 4.5 MB 31.7 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting libclang>=9.0.1\n",
          "  Downloading libclang-13.0.0-py2.py3-none-manylinux1_x86_64.whl (14.5 MB)\n",
          "\u001B[K     |████████████████████████████████| 14.5 MB 68.4 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting termcolor>=1.1.0\n",
          "  Downloading termcolor-1.1.0.tar.gz (3.9 kB)\n",
          "Collecting tf-estimator-nightly==2.8.0.dev2021122109\n",
          "  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)\n",
          "\u001B[K     |████████████████████████████████| 462 kB 30.7 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting tensorflow-io-gcs-filesystem>=0.23.1\n",
          "  Downloading tensorflow_io_gcs_filesystem-0.24.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.1 MB)\n",
          "\u001B[K     |████████████████████████████████| 2.1 MB 32.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting google-pasta>=0.1.1\n",
          "  Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n",
          "\u001B[K     |████████████████████████████████| 57 kB 54.4 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting flatbuffers>=1.12\n",
          "  Downloading flatbuffers-2.0-py2.py3-none-any.whl (26 kB)\n",
          "Requirement already satisfied: protobuf>=3.9.2 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.18.1)\n",
          "Collecting keras<2.9,>=2.8.0rc0\n",
          "  Downloading keras-2.8.0-py2.py3-none-any.whl (1.4 MB)\n",
          "\u001B[K     |████████████████████████████████| 1.4 MB 35.4 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.8/site-packages (from astunparse>=1.6.0->tensorflow) (0.37.0)\n",
          "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (0.6.1)\n",
          "Requirement already satisfied: werkzeug>=0.11.15 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (2.0.2)\n",
          "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (3.3.4)\n",
          "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (0.4.6)\n",
          "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (1.8.0)\n",
          "Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.8/site-packages (from tensorboard->simpletransformers) (1.35.0)\n",
          "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->simpletransformers) (0.2.8)\n",
          "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->simpletransformers) (4.2.4)\n",
          "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard->simpletransformers) (4.7.2)\n",
          "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.8/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard->simpletransformers) (1.3.0)\n",
          "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->simpletransformers) (0.4.8)\n",
          "Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests->simpletransformers) (2.0.0)\n",
          "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->simpletransformers) (1.26.7)\n",
          "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->simpletransformers) (2021.5.30)\n",
          "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->simpletransformers) (3.1)\n",
          "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard->simpletransformers) (3.1.1)\n",
          "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.8/site-packages (from transformers>=4.6.0->simpletransformers) (21.0)\n",
          "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.8/site-packages (from transformers>=4.6.0->simpletransformers) (5.4.1)\n",
          "Requirement already satisfied: sacremoses in /opt/conda/lib/python3.8/site-packages (from transformers>=4.6.0->simpletransformers) (0.0.46)\n",
          "Requirement already satisfied: filelock in /opt/conda/lib/python3.8/site-packages (from transformers>=4.6.0->simpletransformers) (3.3.0)\n",
          "Collecting huggingface-hub<1.0,>=0.1.0\n",
          "  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n",
          "\u001B[K     |████████████████████████████████| 67 kB 44.8 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging>=20.0->transformers>=4.6.0->simpletransformers) (2.4.7)\n",
          "Collecting promise<3,>=2.0\n",
          "  Downloading promise-2.3.tar.gz (19 kB)\n",
          "Requirement already satisfied: Click!=8.0.0,>=7.0 in /opt/conda/lib/python3.8/site-packages (from wandb>=0.10.32->simpletransformers) (8.0.1)\n",
          "Collecting sentry-sdk>=1.0.0\n",
          "  Downloading sentry_sdk-1.5.6-py2.py3-none-any.whl (144 kB)\n",
          "\u001B[K     |████████████████████████████████| 144 kB 38.2 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: python-dateutil>=2.6.1 in /opt/conda/lib/python3.8/site-packages (from wandb>=0.10.32->simpletransformers) (2.8.2)\n",
          "Collecting yaspin>=1.0.0\n",
          "  Downloading yaspin-2.1.0-py3-none-any.whl (18 kB)\n",
          "Collecting shortuuid>=0.5.0\n",
          "  Downloading shortuuid-1.0.8-py3-none-any.whl (9.5 kB)\n",
          "Requirement already satisfied: psutil>=5.0.0 in /opt/conda/lib/python3.8/site-packages (from wandb>=0.10.32->simpletransformers) (5.8.0)\n",
          "Collecting docker-pycreds>=0.4.0\n",
          "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
          "Collecting pathtools\n",
          "  Downloading pathtools-0.1.2.tar.gz (11 kB)\n",
          "Collecting GitPython>=1.0.0\n",
          "  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)\n",
          "\u001B[K     |████████████████████████████████| 181 kB 31.2 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting gitdb<5,>=4.0.1\n",
          "  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)\n",
          "\u001B[K     |████████████████████████████████| 63 kB 33.7 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting smmap<6,>=3.0.1\n",
          "  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n",
          "Collecting dill\n",
          "  Downloading dill-0.3.4-py2.py3-none-any.whl (86 kB)\n",
          "\u001B[K     |████████████████████████████████| 86 kB 34.0 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting fsspec[http]>=2021.05.0\n",
          "  Downloading fsspec-2022.2.0-py3-none-any.whl (134 kB)\n",
          "\u001B[K     |████████████████████████████████| 134 kB 33.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting xxhash\n",
          "  Downloading xxhash-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
          "\u001B[K     |████████████████████████████████| 212 kB 29.0 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting pyarrow!=4.0.0,>=3.0.0\n",
          "  Downloading pyarrow-7.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.7 MB)\n",
          "\u001B[K     |████████████████████████████████| 26.7 MB 19.7 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting aiohttp\n",
          "  Downloading aiohttp-3.8.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.3 MB)\n",
          "\u001B[K     |████████████████████████████████| 1.3 MB 29.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting multiprocess\n",
          "  Downloading multiprocess-0.70.12.2-py38-none-any.whl (128 kB)\n",
          "\u001B[K     |████████████████████████████████| 128 kB 30.7 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting async-timeout<5.0,>=4.0.0a3\n",
          "  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
          "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets->simpletransformers) (21.2.0)\n",
          "Collecting frozenlist>=1.1.1\n",
          "  Downloading frozenlist-1.3.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (158 kB)\n",
          "\u001B[K     |████████████████████████████████| 158 kB 36.8 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting yarl<2.0,>=1.0\n",
          "  Downloading yarl-1.7.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (308 kB)\n",
          "\u001B[K     |████████████████████████████████| 308 kB 30.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting multidict<7.0,>=4.5\n",
          "  Downloading multidict-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (121 kB)\n",
          "\u001B[K     |████████████████████████████████| 121 kB 30.2 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting aiosignal>=1.1.2\n",
          "  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n",
          "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.8/site-packages (from pandas->simpletransformers) (2021.3)\n",
          "Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers>=4.6.0->simpletransformers) (1.1.0)\n",
          "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from scikit-learn->simpletransformers) (3.0.0)\n",
          "Collecting semver\n",
          "  Downloading semver-2.13.0-py2.py3-none-any.whl (12 kB)\n",
          "Collecting astor\n",
          "  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)\n",
          "Requirement already satisfied: pillow>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from streamlit->simpletransformers) (8.2.0)\n",
          "Collecting importlib-metadata>=1.4\n",
          "  Downloading importlib_metadata-4.11.1-py3-none-any.whl (17 kB)\n",
          "Collecting altair>=3.2.0\n",
          "  Downloading altair-4.2.0-py3-none-any.whl (812 kB)\n",
          "\u001B[K     |████████████████████████████████| 812 kB 35.6 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting pydeck>=0.1.dev5\n",
          "  Downloading pydeck-0.7.1-py2.py3-none-any.whl (4.3 MB)\n",
          "\u001B[K     |████████████████████████████████| 4.3 MB 29.4 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting tzlocal\n",
          "  Downloading tzlocal-4.1-py3-none-any.whl (19 kB)\n",
          "Requirement already satisfied: toml in /opt/conda/lib/python3.8/site-packages (from streamlit->simpletransformers) (0.10.2)\n",
          "Requirement already satisfied: tornado>=5.0 in /opt/conda/lib/python3.8/site-packages (from streamlit->simpletransformers) (6.1)\n",
          "Collecting pympler>=0.9\n",
          "  Downloading Pympler-1.0.1-py3-none-any.whl (164 kB)\n",
          "\u001B[K     |████████████████████████████████| 164 kB 30.0 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting validators\n",
          "  Downloading validators-0.18.2-py3-none-any.whl (19 kB)\n",
          "Collecting base58\n",
          "  Downloading base58-2.1.1-py3-none-any.whl (5.6 kB)\n",
          "Collecting blinker\n",
          "  Downloading blinker-1.4.tar.gz (111 kB)\n",
          "\u001B[K     |████████████████████████████████| 111 kB 74.6 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting watchdog\n",
          "  Downloading watchdog-2.1.6-py3-none-manylinux2014_x86_64.whl (76 kB)\n",
          "\u001B[K     |████████████████████████████████| 76 kB 44.0 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: entrypoints in /opt/conda/lib/python3.8/site-packages (from altair>=3.2.0->streamlit->simpletransformers) (0.3)\n",
          "Requirement already satisfied: jsonschema>=3.0 in /opt/conda/lib/python3.8/site-packages (from altair>=3.2.0->streamlit->simpletransformers) (4.0.1)\n",
          "Collecting toolz\n",
          "  Downloading toolz-0.11.2-py3-none-any.whl (55 kB)\n",
          "\u001B[K     |████████████████████████████████| 55 kB 45.7 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: jinja2 in /opt/conda/lib/python3.8/site-packages (from altair>=3.2.0->streamlit->simpletransformers) (3.0.1)\n",
          "Collecting zipp>=0.5\n",
          "  Downloading zipp-3.7.0-py3-none-any.whl (5.3 kB)\n",
          "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema>=3.0->altair>=3.2.0->streamlit->simpletransformers) (0.18.0)\n",
          "Collecting ipywidgets>=7.0.0\n",
          "  Downloading ipywidgets-7.6.5-py2.py3-none-any.whl (121 kB)\n",
          "\u001B[K     |████████████████████████████████| 121 kB 27.1 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: traitlets>=4.3.2 in /opt/conda/lib/python3.8/site-packages (from pydeck>=0.1.dev5->streamlit->simpletransformers) (5.1.0)\n",
          "Requirement already satisfied: ipykernel>=5.1.2 in /opt/conda/lib/python3.8/site-packages (from pydeck>=0.1.dev5->streamlit->simpletransformers) (6.4.1)\n",
          "Requirement already satisfied: ipython-genutils in /opt/conda/lib/python3.8/site-packages (from ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.2.0)\n",
          "Requirement already satisfied: debugpy<2.0,>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (1.5.0)\n",
          "Requirement already satisfied: jupyter-client<8.0 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (7.0.6)\n",
          "Requirement already satisfied: matplotlib-inline<0.2.0,>=0.1.0 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.1.3)\n",
          "Requirement already satisfied: ipython<8.0,>=7.23.1 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (7.28.0)\n",
          "Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.18.0)\n",
          "Requirement already satisfied: pickleshare in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.7.5)\n",
          "Requirement already satisfied: backcall in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.2.0)\n",
          "Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (4.8.0)\n",
          "Requirement already satisfied: pygments in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (2.10.0)\n",
          "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (3.0.20)\n",
          "Requirement already satisfied: decorator in /opt/conda/lib/python3.8/site-packages (from ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (5.1.0)\n",
          "Requirement already satisfied: nbformat>=4.2.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (5.1.3)\n",
          "Collecting widgetsnbextension~=3.5.0\n",
          "  Downloading widgetsnbextension-3.5.2-py2.py3-none-any.whl (1.6 MB)\n",
          "\u001B[K     |████████████████████████████████| 1.6 MB 28.1 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting jupyterlab-widgets>=1.0.0\n",
          "  Downloading jupyterlab_widgets-1.0.2-py3-none-any.whl (243 kB)\n",
          "\u001B[K     |████████████████████████████████| 243 kB 28.5 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from jedi>=0.16->ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.8.2)\n",
          "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.8/site-packages (from jinja2->altair>=3.2.0->streamlit->simpletransformers) (2.0.1)\n",
          "Requirement already satisfied: jupyter-core>=4.6.0 in /opt/conda/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (4.8.1)\n",
          "Requirement already satisfied: nest-asyncio>=1.5 in /opt/conda/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (1.5.1)\n",
          "Requirement already satisfied: pyzmq>=13 in /opt/conda/lib/python3.8/site-packages (from jupyter-client<8.0->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (22.3.0)\n",
          "Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.8/site-packages (from pexpect>4.3->ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.7.0)\n",
          "Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython<8.0,>=7.23.1->ipykernel>=5.1.2->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.2.5)\n",
          "Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (6.4.1)\n",
          "Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (21.1.0)\n",
          "Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.11.0)\n",
          "Requirement already satisfied: Send2Trash>=1.5.0 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (1.8.0)\n",
          "Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.12.1)\n",
          "Requirement already satisfied: nbconvert in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (6.2.0)\n",
          "Requirement already satisfied: cffi>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (1.14.6)\n",
          "Requirement already satisfied: pycparser in /opt/conda/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (2.20)\n",
          "Requirement already satisfied: defusedxml in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.7.1)\n",
          "Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.1.2)\n",
          "Requirement already satisfied: testpath in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.5.0)\n",
          "Requirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.8.4)\n",
          "Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (1.5.0)\n",
          "Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (4.1.0)\n",
          "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.5.4)\n",
          "Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->pydeck>=0.1.dev5->streamlit->simpletransformers) (0.5.1)\n",
          "Collecting backports.zoneinfo\n",
          "  Downloading backports.zoneinfo-0.2.1-cp38-cp38-manylinux1_x86_64.whl (74 kB)\n",
          "\u001B[K     |████████████████████████████████| 74 kB 32.2 MB/s eta 0:00:01\n",
          "\u001B[?25hCollecting pytz-deprecation-shim\n",
          "  Downloading pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl (15 kB)\n",
          "Collecting tzdata\n",
          "  Downloading tzdata-2021.5-py2.py3-none-any.whl (339 kB)\n",
          "\u001B[K     |████████████████████████████████| 339 kB 30.1 MB/s eta 0:00:01\n",
          "\u001B[?25hBuilding wheels for collected packages: termcolor, promise, pathtools, seqeval, blinker\n",
          "  Building wheel for termcolor (setup.py) ... \u001B[?25ldone\n",
          "\u001B[?25h  Created wheel for termcolor: filename=termcolor-1.1.0-py3-none-any.whl size=4847 sha256=067672b2f7f28ac472a6b1cd07b584b83890568140bc85c553cbaa0039a98989\n",
          "  Stored in directory: /tmp/pip-ephem-wheel-cache-r6lr5xzv/wheels/a0/16/9c/5473df82468f958445479c59e784896fa24f4a5fc024b0f501\n",
          "  Building wheel for promise (setup.py) ... \u001B[?25ldone\n",
          "\u001B[?25h  Created wheel for promise: filename=promise-2.3-py3-none-any.whl size=21502 sha256=504dc84248d0ec44574cc67d80dc391414827e3989a427d5413e1e9700612bc8\n",
          "  Stored in directory: /tmp/pip-ephem-wheel-cache-r6lr5xzv/wheels/54/aa/01/724885182f93150035a2a91bce34a12877e8067a97baaf5dc8\n",
          "  Building wheel for pathtools (setup.py) ... \u001B[?25ldone\n",
          "\u001B[?25h  Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8807 sha256=3cb49e1e70fc8dd8f3f0b0ddb00cc631f925b930abe92201655bbd0fc83e8a6c\n",
          "  Stored in directory: /tmp/pip-ephem-wheel-cache-r6lr5xzv/wheels/4c/8e/7e/72fbc243e1aeecae64a96875432e70d4e92f3d2d18123be004\n",
          "  Building wheel for seqeval (setup.py) ... \u001B[?25ldone\n",
          "\u001B[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16181 sha256=8d4e2b51b4af004a5815585543b1e879616e537a93a913477ffd89a206c5f552\n",
          "  Stored in directory: /tmp/pip-ephem-wheel-cache-r6lr5xzv/wheels/ad/5c/ba/05fa33fa5855777b7d686e843ec07452f22a66a138e290e732\n",
          "  Building wheel for blinker (setup.py) ... \u001B[?25ldone\n",
          "\u001B[?25h  Created wheel for blinker: filename=blinker-1.4-py3-none-any.whl size=13478 sha256=3c30aa2407f8981ced9bced863d768115b6e523fe33184a32b3585b400c9e006\n",
          "  Stored in directory: /tmp/pip-ephem-wheel-cache-r6lr5xzv/wheels/b7/a5/68/fe632054a5eadd531c7a49d740c50eb6adfbeca822b4eab8d4\n",
          "Successfully built termcolor promise pathtools seqeval blinker\n",
          "Installing collected packages: multidict, frozenlist, yarl, widgetsnbextension, tzdata, smmap, jupyterlab-widgets, backports.zoneinfo, async-timeout, aiosignal, zipp, toolz, termcolor, pytz-deprecation-shim, pandas, ipywidgets, gitdb, fsspec, dill, aiohttp, yaspin, xxhash, watchdog, validators, tzlocal, tokenizers, shortuuid, sentry-sdk, semver, pympler, pydeck, pyarrow, promise, pathtools, multiprocess, importlib-metadata, huggingface-hub, GitPython, docker-pycreds, blinker, base58, astor, altair, wrapt, wandb, transformers, tf-estimator-nightly, tensorflow-io-gcs-filesystem, tensorboard, streamlit, seqeval, sentencepiece, opt-einsum, libclang, keras-preprocessing, keras, h5py, google-pasta, gast, flatbuffers, datasets, astunparse, tensorflow, simpletransformers\n",
          "  Attempting uninstall: tensorboard\n",
          "    Found existing installation: tensorboard 2.6.0\n",
          "    Uninstalling tensorboard-2.6.0:\n",
          "      Successfully uninstalled tensorboard-2.6.0\n",
          "Successfully installed GitPython-3.1.27 aiohttp-3.8.1 aiosignal-1.2.0 altair-4.2.0 astor-0.8.1 astunparse-1.6.3 async-timeout-4.0.2 backports.zoneinfo-0.2.1 base58-2.1.1 blinker-1.4 datasets-1.18.3 dill-0.3.4 docker-pycreds-0.4.0 flatbuffers-2.0 frozenlist-1.3.0 fsspec-2022.2.0 gast-0.5.3 gitdb-4.0.9 google-pasta-0.2.0 h5py-3.6.0 huggingface-hub-0.4.0 importlib-metadata-4.11.1 ipywidgets-7.6.5 jupyterlab-widgets-1.0.2 keras-2.8.0 keras-preprocessing-1.1.2 libclang-13.0.0 multidict-6.0.2 multiprocess-0.70.12.2 opt-einsum-3.3.0 pandas-1.4.1 pathtools-0.1.2 promise-2.3 pyarrow-7.0.0 pydeck-0.7.1 pympler-1.0.1 pytz-deprecation-shim-0.1.0.post0 semver-2.13.0 sentencepiece-0.1.96 sentry-sdk-1.5.6 seqeval-1.2.2 shortuuid-1.0.8 simpletransformers-0.63.4 smmap-5.0.0 streamlit-1.6.0 tensorboard-2.8.0 tensorflow-2.8.0 tensorflow-io-gcs-filesystem-0.24.0 termcolor-1.1.0 tf-estimator-nightly-2.8.0.dev2021122109 tokenizers-0.11.5 toolz-0.11.2 transformers-4.16.2 tzdata-2021.5 tzlocal-4.1 validators-0.18.2 wandb-0.12.10 watchdog-2.1.6 widgetsnbextension-3.5.2 wrapt-1.13.3 xxhash-3.0.0 yarl-1.7.2 yaspin-2.1.0 zipp-3.7.0\n",
          "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\n",
          "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
          "Collecting tensorboardx\n",
          "  Downloading tensorboardX-2.5-py2.py3-none-any.whl (125 kB)\n",
          "\u001B[K     |████████████████████████████████| 125 kB 17.3 MB/s eta 0:00:01\n",
          "\u001B[?25hRequirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from tensorboardx) (1.16.0)\n",
          "Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from tensorboardx) (1.21.2)\n",
          "Requirement already satisfied: protobuf>=3.8.0 in /opt/conda/lib/python3.8/site-packages (from tensorboardx) (3.18.1)\n",
          "Installing collected packages: tensorboardx\n",
          "Successfully installed tensorboardx-2.5\n",
          "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\n"
         ]
        }
       ],
       "source": [
        "!pip install simpletransformers tensorflow\n",
        "!pip install tensorboardx"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 3,
       "metadata": {
        "id": "RJC8wj73Zd_p"
       },
       "outputs": [],
       "source": [
        "from simpletransformers.classification import ClassificationModel, ClassificationArgs, MultiLabelClassificationModel, MultiLabelClassificationArgs\n",
        "from urllib import request\n",
        "import pandas as pd\n",
        "import logging\n",
        "import torch\n",
        "from collections import Counter\n",
        "from ast import literal_eval"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 4,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "bsX3b7ZNYVZe",
        "outputId": "845660e8-c68b-4a52-d9ce-3c06bf7356d8"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Cuda available?  True\n"
         ]
        }
       ],
       "source": [
        "# prepare logger\n",
        "logging.basicConfig(level=logging.INFO)\n",
        "\n",
        "transformers_logger = logging.getLogger(\"transformers\")\n",
        "transformers_logger.setLevel(logging.WARNING)\n",
        "\n",
        "# check gpu\n",
        "cuda_available = torch.cuda.is_available()\n",
        "\n",
        "print('Cuda available? ',cuda_available)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 5,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
          "Requirement already satisfied: tensorflow in /opt/conda/lib/python3.8/site-packages (2.8.0)\n",
          "Requirement already satisfied: astunparse>=1.6.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.6.3)\n",
          "Requirement already satisfied: setuptools in /opt/conda/lib/python3.8/site-packages (from tensorflow) (58.2.0)\n",
          "Requirement already satisfied: typing-extensions>=3.6.6 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.10.0.2)\n",
          "Requirement already satisfied: libclang>=9.0.1 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (13.0.0)\n",
          "Requirement already satisfied: numpy>=1.20 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.21.2)\n",
          "Requirement already satisfied: keras-preprocessing>=1.1.1 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.1.2)\n",
          "Requirement already satisfied: h5py>=2.9.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.6.0)\n",
          "Requirement already satisfied: gast>=0.2.1 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (0.5.3)\n",
          "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (0.24.0)\n",
          "Requirement already satisfied: google-pasta>=0.1.1 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (0.2.0)\n",
          "Requirement already satisfied: flatbuffers>=1.12 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (2.0)\n",
          "Requirement already satisfied: termcolor>=1.1.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.1.0)\n",
          "Requirement already satisfied: tf-estimator-nightly==2.8.0.dev2021122109 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (2.8.0.dev2021122109)\n",
          "Requirement already satisfied: six>=1.12.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.16.0)\n",
          "Requirement already satisfied: keras<2.9,>=2.8.0rc0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (2.8.0)\n",
          "Requirement already satisfied: tensorboard<2.9,>=2.8 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (2.8.0)\n",
          "Requirement already satisfied: wrapt>=1.11.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.13.3)\n",
          "Requirement already satisfied: protobuf>=3.9.2 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.18.1)\n",
          "Requirement already satisfied: absl-py>=0.4.0 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (0.14.1)\n",
          "Requirement already satisfied: opt-einsum>=2.3.2 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (3.3.0)\n",
          "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /opt/conda/lib/python3.8/site-packages (from tensorflow) (1.41.0)\n",
          "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/conda/lib/python3.8/site-packages (from astunparse>=1.6.0->tensorflow) (0.37.0)\n",
          "Requirement already satisfied: markdown>=2.6.8 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (3.3.4)\n",
          "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (0.4.6)\n",
          "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (0.6.1)\n",
          "Requirement already satisfied: werkzeug>=0.11.15 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (2.0.2)\n",
          "Requirement already satisfied: requests<3,>=2.21.0 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (2.26.0)\n",
          "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (1.8.0)\n",
          "Requirement already satisfied: google-auth<3,>=1.6.3 in /opt/conda/lib/python3.8/site-packages (from tensorboard<2.9,>=2.8->tensorflow) (1.35.0)\n",
          "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (0.2.8)\n",
          "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (4.7.2)\n",
          "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (4.2.4)\n",
          "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.8/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow) (1.3.0)\n",
          "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/conda/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.9,>=2.8->tensorflow) (0.4.8)\n",
          "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (1.26.7)\n",
          "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (2021.5.30)\n",
          "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (3.1)\n",
          "Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.9,>=2.8->tensorflow) (2.0.0)\n",
          "Requirement already satisfied: oauthlib>=3.0.0 in /opt/conda/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.9,>=2.8->tensorflow) (3.1.1)\n",
          "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\n"
         ]
        }
       ],
       "source": [
        "!pip install tensorflow"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 6,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "HpRLLRzkTwdL",
        "outputId": "9dc072ea-e419-4bc1-ad99-507cdd4e1394"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Found GPU at: /device:GPU:0\n"
         ]
    
    Emily Haw's avatar
    Emily Haw committed
        },
        {
    
    Ella's avatar
    Ella committed
         "name": "stderr",
         "output_type": "stream",
         "text": [
          "2022-02-26 13:14:29.325853: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
          "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
          "2022-02-26 13:14:29.327245: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:29.328399: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:29.329348: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:32.919742: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:32.920448: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:32.921064: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
          "2022-02-26 13:14:32.921610: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /device:GPU:0 with 6966 MB memory:  -> device: 0, name: Quadro P4000, pci bus id: 0000:00:05.0, compute capability: 6.1\n"
         ]
        }
       ],
       "source": [
        "if cuda_available:\n",
        "  import tensorflow as tf\n",
        "  # Get the GPU device name.\n",
        "  device_name = tf.test.gpu_device_name()\n",
        "  # The device name should look like the following:\n",
        "  if device_name == '/device:GPU:0':\n",
        "      print('Found GPU at: {}'.format(device_name))\n",
        "  else:\n",
        "      raise SystemError('GPU device not found')"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "BMQDATlOZHxu"
       },
       "source": [
        "# Fetch Don't Patronize Me! data manager module"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 7,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "UW903YxwThrH",
        "outputId": "4dc91901-fa9f-446a-a883-dca331443d3d"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Fetching https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py\n"
         ]
        }
       ],
       "source": [
        "module_url = f\"https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py\"\n",
        "module_name = module_url.split('/')[-1]\n",
        "print(f'Fetching {module_url}')\n",
        "#with open(\"file_1.txt\") as f1, open(\"file_2.txt\") as f2\n",
        "with request.urlopen(module_url) as f, open(module_name,'w') as outf:\n",
        "  a = f.read()\n",
        "  outf.write(a.decode('utf-8'))"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 8,
       "metadata": {
        "id": "PRxm0179aqzw"
       },
       "outputs": [],
       "source": [
        "# helper function to save predictions to an output file\n",
        "def labels2file(p, outf_path):\n",
        "\twith open(outf_path,'w') as outf:\n",
        "\t\tfor pi in p:\n",
        "\t\t\toutf.write(','.join([str(k) for k in pi])+'\\n')"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 9,
       "metadata": {
        "id": "gcDThFWVBxGb"
       },
       "outputs": [],
       "source": [
        "from dont_patronize_me import DontPatronizeMe"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 10,
       "metadata": {
        "id": "3Ay5_5Y0ThrI"
       },
       "outputs": [],
       "source": [
        "dpm = DontPatronizeMe('.', '.')"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 11,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/"
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "2r3USK4eThrJ",
        "outputId": "53bbe18a-47df-4079-d28a-cf890c08b306"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Map of label to numerical label:\n",
          "{'Unbalanced_power_relations': 0, 'Shallow_solution': 1, 'Presupposition': 2, 'Authority_voice': 3, 'Metaphors': 4, 'Compassion': 5, 'The_poorer_the_merrier': 6}\n"
         ]
        }
       ],
       "source": [
        "dpm.load_task1()\n",
        "dpm.load_task2(return_one_hot=True)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "P0YcdU80IbiS"
       },
       "source": [
        "# Load paragraph IDs"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 12,
       "metadata": {
        "id": "8AReWYHYOUqx"
       },
       "outputs": [],
       "source": [
        "trids = pd.read_csv('./practice_splits/train_semeval_parids-labels.csv')\n",
        "teids = pd.read_csv('./practice_splits/dev_semeval_parids-labels.csv')"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 13,
       "metadata": {
        "colab": {
         "base_uri": "https://localhost:8080/",
         "height": 205
    
    Emily Haw's avatar
    Emily Haw committed
        },
    
    Ella's avatar
    Ella committed
        "id": "a-_ADoJAOWJA",
        "outputId": "85dbe757-4ee5-4887-deac-60185515e141"
       },
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>label</th>\n",
           "    </tr>\n",
           "  </thead>\n",
           "  <tbody>\n",
           "    <tr>\n",
           "      <th>0</th>\n",
           "      <td>4341</td>\n",
           "      <td>[1, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1</th>\n",
           "      <td>4136</td>\n",
           "      <td>[0, 1, 0, 0, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2</th>\n",
           "      <td>10352</td>\n",
           "      <td>[1, 0, 0, 0, 0, 1, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>3</th>\n",
           "      <td>8279</td>\n",
           "      <td>[0, 0, 0, 1, 0, 0, 0]</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>4</th>\n",
           "      <td>1164</td>\n",
           "      <td>[1, 0, 0, 1, 1, 1, 0]</td>\n",
           "    </tr>\n",
           "  </tbody>\n",
           "</table>\n",
           "</div>"
    
    Emily Haw's avatar
    Emily Haw committed
          ],
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "   par_id                  label\n",
           "0    4341  [1, 0, 0, 1, 0, 0, 0]\n",
           "1    4136  [0, 1, 0, 0, 0, 0, 0]\n",
           "2   10352  [1, 0, 0, 0, 0, 1, 0]\n",
           "3    8279  [0, 0, 0, 1, 0, 0, 0]\n",
           "4    1164  [1, 0, 0, 1, 1, 1, 0]"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 13,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "trids.head()"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 14,
       "metadata": {
        "id": "7IfCZjwQ16MS"
       },
       "outputs": [],
       "source": [
        "trids.par_id = trids.par_id.astype(str)\n",
        "teids.par_id = teids.par_id.astype(str)"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {
        "id": "8lXrNj_Ww_FC"
       },
       "source": [
        "\n",
        "\n",
        "# Rebuild training set (Task 1)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 15,
       "metadata": {
        "id": "BOxDR1H2g_3p"
       },
       "outputs": [],
       "source": [
        "rows = [] # will contain par_id, label and text\n",
        "for idx in range(len(trids)):  \n",
        "  parid = trids.par_id[idx]\n",
        "  #print(parid)\n",
        "  # select row from original dataset to retrieve `text` and binary label\n",
        "  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]\n",
        "  label = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].label.values[0]\n",
        "  rows.append({\n",
        "      'par_id':parid,\n",
        "      'text':text,\n",
        "      'label':label\n",
        "  })\n",
        "  "
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 16,
       "metadata": {
        "id": "8e3E08Yown5p"
       },
       "outputs": [],
       "source": [
        "trdf1 = pd.DataFrame(rows)"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 17,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>text</th>\n",
           "      <th>label</th>\n",
           "    </tr>\n",
           "  </thead>\n",
           "  <tbody>\n",
           "    <tr>\n",
           "      <th>0</th>\n",
           "      <td>4341</td>\n",
           "      <td>The scheme saw an estimated 150,000 children f...</td>\n",
           "      <td>1</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>1</th>\n",
           "      <td>4136</td>\n",
           "      <td>Durban 's homeless communities reconciliation ...</td>\n",
           "      <td>1</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>2</th>\n",
           "      <td>10352</td>\n",
           "      <td>The next immediate problem that cropped up was...</td>\n",
           "      <td>1</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>3</th>\n",
           "      <td>8279</td>\n",
           "      <td>Far more important than the implications for t...</td>\n",
           "      <td>1</td>\n",
           "    </tr>\n",
           "    <tr>\n",
           "      <th>4</th>\n",
           "      <td>1164</td>\n",
           "      <td>To strengthen child-sensitive social protectio...</td>\n",
           "      <td>1</td>\n",
           "    </tr>\n",
           "  </tbody>\n",
           "</table>\n",
           "</div>"
    
    Emily Haw's avatar
    Emily Haw committed
          ],
    
    Ella's avatar
    Ella committed
          "text/plain": [
           "  par_id                                               text  label\n",
           "0   4341  The scheme saw an estimated 150,000 children f...      1\n",
           "1   4136  Durban 's homeless communities reconciliation ...      1\n",
           "2  10352  The next immediate problem that cropped up was...      1\n",
           "3   8279  Far more important than the implications for t...      1\n",
           "4   1164  To strengthen child-sensitive social protectio...      1"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 17,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "trdf1.head()"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 18,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/plain": [
           "8375"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 18,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "trdf1.shape[0]"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 19,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/plain": [
           "0    7581\n",
           "1     794\n",
           "Name: label, dtype: int64"
    
    Emily Haw's avatar
    Emily Haw committed
          ]
    
    Ella's avatar
    Ella committed
         },
         "execution_count": 19,
         "metadata": {},
         "output_type": "execute_result"
        }
       ],
       "source": [
        "trdf1[\"label\"].value_counts()"
       ]
      },
      {
       "cell_type": "markdown",
       "metadata": {},
       "source": [
        "**Discussion regarding Analysis of class labels**\n",
        "\n",
        "The dataset is a skewed dataset, with 10 times more sentences not exhibiting pcl compared to sentences exhibiting pcl."
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 20,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "name": "stdout",
         "output_type": "stream",
         "text": [
          "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
          "Collecting py-readability-metrics\n",
          "  Downloading py_readability_metrics-1.4.5-py3-none-any.whl (26 kB)\n",
          "Requirement already satisfied: nltk in /opt/conda/lib/python3.8/site-packages (from py-readability-metrics) (3.6.4)\n",
          "Requirement already satisfied: tqdm in /opt/conda/lib/python3.8/site-packages (from nltk->py-readability-metrics) (4.62.3)\n",
          "Requirement already satisfied: regex in /opt/conda/lib/python3.8/site-packages (from nltk->py-readability-metrics) (2021.10.8)\n",
          "Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from nltk->py-readability-metrics) (1.1.0)\n",
          "Requirement already satisfied: click in /opt/conda/lib/python3.8/site-packages (from nltk->py-readability-metrics) (8.0.1)\n",
          "Installing collected packages: py-readability-metrics\n",
          "Successfully installed py-readability-metrics-1.4.5\n",
          "\u001B[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001B[0m\n",
          "/opt/conda/lib/python3.8/runpy.py:127: RuntimeWarning: 'nltk.downloader' found in sys.modules after import of package 'nltk', but prior to execution of 'nltk.downloader'; this may result in unpredictable behaviour\n",
          "  warn(RuntimeWarning(msg))\n",
          "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
          "[nltk_data]   Unzipping tokenizers/punkt.zip.\n"
         ]
        }
       ],
       "source": [
        "!pip install py-readability-metrics\n",
        "!python -m nltk.downloader punkt"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 21,
       "metadata": {},
       "outputs": [],
       "source": [
        "from readability import Readability\n",
        "\n",
        "def calculate_readability(text):\n",
        "    try:\n",
        "        r = Readability(text)\n",
        "        return r.flesch_kincaid().score\n",
        "    except:\n",
        "        return 0"
       ]
      },
      {
       "cell_type": "code",
       "execution_count": 22,
       "metadata": {},
       "outputs": [
    
    Emily Haw's avatar
    Emily Haw committed
        {
    
    Ella's avatar
    Ella committed
         "data": {
          "text/html": [
           "<div>\n",
           "<style scoped>\n",
           "    .dataframe tbody tr th:only-of-type {\n",
           "        vertical-align: middle;\n",
           "    }\n",
           "\n",
           "    .dataframe tbody tr th {\n",
           "        vertical-align: top;\n",
           "    }\n",
           "\n",
           "    .dataframe thead th {\n",
           "        text-align: right;\n",
           "    }\n",
           "</style>\n",
           "<table border=\"1\" class=\"dataframe\">\n",
           "  <thead>\n",
           "    <tr style=\"text-align: right;\">\n",
           "      <th></th>\n",
           "      <th>par_id</th>\n",
           "      <th>text</th>\n",