testing, logging, function organisation

45a09d45 · kmilicic · 086ec459 · 45a09d45 · 45a09d45 · 45a09d45
Commit 45a09d45 authored 1 month ago by kmilicic
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,6 +3,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -yq install python3
 RUN python3 -m venv /venv
 ENV PATH="/venv/bin:$PATH"
 COPY requirements.txt /model/
-RUN pip3 install -r /model/requirements.txt
-COPY model.py best_model.pth /model/
-CMD /model/model.py --input=/data/test.csv --output=/data/aki.csv
+WORKDIR /model
+RUN pip3 install -r requirements.txt
+COPY model.py best_model.pth ./
+CMD ./model.py --input=/data/test.csv --output=/data/aki.csv
--- a/aki.csv
+++ b/aki.csv
--- a/best_model.pth
+++ b/best_model.pth
--- a/model.py
+++ b/model.py
 #!/usr/bin/env python3

 import argparse
+import logging
 from pathlib import Path

 import pandas as pd
@@ -11,76 +12,35 @@ from sklearn.metrics import fbeta_score
 from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm

-MODEL_PATH = Path("/model/best_model.pth")
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    handlers=[logging.StreamHandler()],
+)
+logger = logging.getLogger(__name__)


-def preprocess_features(df: pd.DataFrame) -> pd.DataFrame:
-    df["sex"] = (df["sex"] == "M").astype(int)
-
-    date_cols = [col for col in df.columns if "date" in col]
-
-    for col in date_cols:
-        df[col] = pd.to_datetime(df[col]).dt.date
-
-    date_features = pd.DataFrame(index=df.index)
-
-    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing dates"):
-        current_date = pd.to_datetime(row[date_cols]).dropna().dt.date.max()
-        recent_dates = []
-        older_dates = []
-
-        # Get current value
-        c1 = None
-        for col in date_cols:
-            if pd.notna(df.at[idx, col]):
-                days_diff = (current_date - df.at[idx, col]).days
-                value = df.at[idx, col.replace("date", "result")]
-                if days_diff == 0:
-                    c1 = value
-                if 0 <= days_diff <= 7:
-                    recent_dates.append(value)
-                elif 8 <= days_diff <= 365:
-                    older_dates.append(value)
-
-        # Calculate ratios
-        rv1 = min(recent_dates) if recent_dates else None
-        rv2 = pd.Series(older_dates).median() if older_dates else None
-
-        date_features.at[idx, "ratio1"] = (
-            c1 / rv1 if (c1 is not None and rv1 is not None) else None
-        )
-        date_features.at[idx, "ratio2"] = (
-            c1 / rv2 if (c1 is not None and rv2 is not None) else None
-        )
-        date_features.at[idx, "has_recent"] = 1 if recent_dates else 0
-
-    df["ratio1"] = date_features["ratio1"]
-    df["ratio2"] = date_features["ratio2"]
-    df["has_recent"] = date_features["has_recent"]
-
-    selected_columns = ["age", "sex", "ratio1", "ratio2", "has_recent"]
-    df = df[selected_columns]
-
-    # Fill missing values with median
-    df = df.fillna(df.median())
-
-    # Scale features
-    scaler = StandardScaler()
-    df[["age", "ratio1", "ratio2"]] = scaler.fit_transform(
-        df[["age", "ratio1", "ratio2"]]
-    )
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input", default="test.csv")
+    parser.add_argument("--output", default="aki.csv")
+    parser.add_argument("--model_path", default="best_model.pth")
+    parser.add_argument("--train", action="store_true", help="Train the model")
+    args = parser.parse_args()

-    return df
+    model = train_model(args.model_path) if args.train else load_model(args.model_path)

+    test_df = preprocess_features(pd.read_csv(args.input))
+    predictions_df = predict(model, test_df)

-def preprocess_targets(df: pd.DataFrame) -> pd.Series:
-    return (df["aki"] == "y").astype(int)
+    logger.info(f"Saving model predictions to {args.output}")
+    predictions_df.to_csv(args.output, index=False)


 class Net(nn.Module):
-    def __init__(self, input_size: int) -> None:
+    def __init__(self) -> None:
        super(Net, self).__init__()
-        self.layer1: nn.Linear = nn.Linear(input_size, 16)
+        self.layer1: nn.Linear = nn.Linear(5, 16)
        self.layer2: nn.Linear = nn.Linear(16, 8)
        self.layer3: nn.Linear = nn.Linear(8, 1)
        self.relu: nn.ReLU = nn.ReLU()
@@ -94,7 +54,8 @@ class Net(nn.Module):
        return x.squeeze()


-def train_model():
+def train_model(model_path: Path) -> Net:
+    logger.info("Training model")
    # Load preprocessed data
    train_df = pd.read_csv("training.csv")
    test_df = pd.read_csv("test.csv")
@@ -112,12 +73,13 @@ def train_model():
    y_test_tensor = torch.FloatTensor(y_test.values)

    # Initialize model, loss and optimizer
-    model = Net(X_train.shape[1])
+    model = Net()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters())

    # Training loop
-    n_epochs = 500
+    n_epochs = 100
+    # n_epochs = 500
    batch_size = 32
    best_f3 = 0
    best_model_state = None
@@ -167,21 +129,100 @@ def train_model():
    # Load best model state
    assert best_model_state is not None
    model.load_state_dict(best_model_state)
-    torch.save(best_model_state, MODEL_PATH)
+    torch.save(best_model_state, model_path)
    print(f"Best F3 score: {best_f3:.3f}")

    return model


-def predict(X_test: pd.DataFrame) -> pd.DataFrame:
-    X_test = preprocess_features(X_test)
-    X_test_tensor = torch.FloatTensor(X_test.values)
+def preprocess_features(df: pd.DataFrame) -> pd.DataFrame:
+    validate_data(df)
+
+    df["sex"] = (df["sex"] == "M").astype(int)
+
+    date_cols = [col for col in df.columns if "date" in col]

-    # Load the trained model
-    model = Net(input_size=X_test_tensor.shape[1])
-    model.load_state_dict(torch.load(MODEL_PATH, weights_only=True))
+    for col in date_cols:
+        df[col] = pd.to_datetime(df[col]).dt.date
+
+    date_features = pd.DataFrame(index=df.index)
+
+    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing dates"):
+        current_date = pd.to_datetime(row[date_cols]).dropna().dt.date.max()
+        recent_dates = []
+        older_dates = []
+
+        # Get current value
+        c1 = None
+        for col in date_cols:
+            if pd.notna(df.at[idx, col]):
+                days_diff = (current_date - df.at[idx, col]).days
+                value = df.at[idx, col.replace("date", "result")]
+                if days_diff == 0:
+                    c1 = value
+                if 0 <= days_diff <= 7:
+                    recent_dates.append(value)
+                elif 8 <= days_diff <= 365:
+                    older_dates.append(value)
+
+        # Calculate ratios
+        rv1 = min(recent_dates) if recent_dates else None
+        rv2 = pd.Series(older_dates).median() if older_dates else None
+
+        date_features.at[idx, "ratio1"] = (
+            c1 / rv1 if (c1 is not None and rv1 is not None) else None
+        )
+        date_features.at[idx, "ratio2"] = (
+            c1 / rv2 if (c1 is not None and rv2 is not None) else None
+        )
+        date_features.at[idx, "has_recent"] = 1 if recent_dates else 0
+
+    df["ratio1"] = date_features["ratio1"]
+    df["ratio2"] = date_features["ratio2"]
+    df["has_recent"] = date_features["has_recent"]
+
+    selected_columns = ["age", "sex", "ratio1", "ratio2", "has_recent"]
+    df = df[selected_columns]
+
+    # Fill missing values with median
+    df = df.fillna(df.median())
+
+    # Scale features
+    scaler = StandardScaler()
+    df[["age", "ratio1", "ratio2"]] = scaler.fit_transform(
+        df[["age", "ratio1", "ratio2"]]
+    )
+
+    return df
+
+
+def validate_data(df: pd.DataFrame) -> None:
+    required_columns = {"age", "sex", "aki"}
+    if not required_columns.issubset(df.columns):
+        raise ValueError(
+            f"Missing required columns: {required_columns - set(df.columns)}"
+        )
+
+    if df["age"].min() < 0:
+        raise ValueError("Age cannot be negative")
+
+
+def preprocess_targets(df: pd.DataFrame) -> pd.Series:
+    return (df["aki"] == "y").astype(int)
+
+
+def load_model(model_path: Path) -> Net:
+    logger.info("Loading model")
+    model = Net()
+    model.load_state_dict(torch.load(model_path, weights_only=True))
    model.eval()

+    return model
+
+
+def predict(model: Net, X_test: pd.DataFrame) -> pd.DataFrame:
+    X_test_tensor = torch.FloatTensor(X_test.values)
+
    # Get predictions
    with torch.no_grad():
        outputs = model(X_test_tensor)
@@ -189,20 +230,5 @@ def predict(X_test: pd.DataFrame) -> pd.DataFrame:
        return pd.DataFrame(predictions, columns=["aki"])


-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input", default="test.csv")
-    parser.add_argument("--output", default="aki.csv")
-    parser.add_argument("--train", action="store_true", help="Train the model")
-    args = parser.parse_args()
-
-    if args.train:
-        train_model()
-
-    test_df = pd.read_csv(args.input)
-    predictions_df = predict(test_df)
-    predictions_df.to_csv(args.output, index=False)
-
-
 if __name__ == "__main__":
    main()
--- a/pyproject.toml
+++ b/pyproject.toml
 [project]
 dependencies = [
  "pandas>=2.2.3",
+  "pytest>=8.3.4",
  "scikit-learn>=1.6.1",
  "torch>=2.5.1",
  "tqdm>=4.67.1",

--- a/final_score_speculation.py
+++ b/final_score_speculation.py
--- a/test/__init__.py
+++ b/test/__init__.py
--- a/test/test_model.py
+++ b/test/test_model.py
+from pathlib import Path
+
+import pandas as pd
+import pytest
+import torch
+from sklearn.metrics import fbeta_score
+
+import model
+
+
+def test_preprocess_features():
+    test_data = pd.DataFrame({"age": [30, 40], "sex": ["M", "F"], "aki": ["n", "y"]})
+    result = model.preprocess_features(test_data)
+    assert "sex" in result.columns
+    assert result["sex"].dtype == int
+
+
+def test_validate_data_missing_columns():
+    test_data = pd.DataFrame({"age": [30, 40], "sex": ["M", "F"]})
+    with pytest.raises(ValueError):
+        model.validate_data(test_data)
+
+
+def test_validate_data_negative_age():
+    test_data = pd.DataFrame({"age": [-1, 40], "sex": ["M", "F"], "aki": ["n", "y"]})
+    with pytest.raises(ValueError):
+        model.validate_data(test_data)
+
+
+def test_preprocess_targets():
+    test_data = pd.DataFrame({"aki": ["y", "n", "y"]})
+    result = model.preprocess_targets(test_data)
+    assert result.tolist() == [1, 0, 1]
+
+
+def test_net_forward_pass():
+    net = model.Net()
+    test_tensor = torch.randn(3, 5)  # Batch of 3 samples with 5 features
+    output = net(test_tensor)
+    assert output.shape == torch.Size([3])  # Should return 1 prediction per sample
+    assert (output >= 0).all() and (
+        output <= 1
+    ).all()  # Outputs should be between 0 and 1
+
+
+def test_model_performance():
+    model_path = Path("best_model.pth")
+    if not model_path.exists():
+        pytest.skip("Model weights not found, skipping performance test")
+
+    test_df = pd.read_csv("test.csv")
+    X_test = model.preprocess_features(test_df)
+    y_test = model.preprocess_targets(test_df)
+
+    net = model.load_model(model_path)
+
+    with torch.no_grad():
+        X_test_tensor = torch.FloatTensor(X_test.values)
+        outputs = net(X_test_tensor)
+        predictions = (outputs > 0.5).float()
+
+        accuracy = (predictions == torch.FloatTensor(y_test.values)).float().mean()
+        f3_score = fbeta_score(y_test.values.astype(int), predictions.numpy(), beta=3)
+
+        assert accuracy > 0.95, f"Model accuracy {accuracy:.3f} below 95% threshold"
+        assert f3_score > 0.95, f"Model F3 score {f3_score:.3f} below 95% threshold"
--- a/train.py
+++ b/train.py
-import pandas as pd
-import torch
-from tqdm import tqdm
-import torch.nn as nn
-import torch.optim as optim
-from sklearn.preprocessing import StandardScaler
-from sklearn.metrics import fbeta_score
-
-
-def preprocess_features(df: pd.DataFrame) -> pd.DataFrame:
-    df["sex"] = (df["sex"] == "M").astype(int)
-
-    date_cols = [col for col in df.columns if "date" in col]
-
-    for col in date_cols:
-        df[col] = pd.to_datetime(df[col]).dt.date
-
-    date_features = pd.DataFrame(index=df.index)
-
-    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing dates"):
-        current_date = pd.to_datetime(row[date_cols]).dropna().dt.date.max()
-        recent_dates = []
-        older_dates = []
-
-        # Get current value
-        c1 = None
-        for col in date_cols:
-            if pd.notna(df.at[idx, col]):
-                days_diff = (current_date - df.at[idx, col]).days
-                value = df.at[idx, col.replace("date", "result")]
-                if days_diff == 0:
-                    c1 = value
-                if 0 <= days_diff <= 7:
-                    recent_dates.append(value)
-                elif 8 <= days_diff <= 365:
-                    older_dates.append(value)
-
-        # Calculate ratios
-        rv1 = min(recent_dates) if recent_dates else None
-        rv2 = pd.Series(older_dates).median() if older_dates else None
-
-        date_features.at[idx, "ratio1"] = (
-            c1 / rv1 if (c1 is not None and rv1 is not None) else None
-        )
-        date_features.at[idx, "ratio2"] = (
-            c1 / rv2 if (c1 is not None and rv2 is not None) else None
-        )
-        date_features.at[idx, "has_recent"] = 1 if recent_dates else 0
-
-    df["ratio1"] = date_features["ratio1"]
-    df["ratio2"] = date_features["ratio2"]
-    df["has_recent"] = date_features["has_recent"]
-
-    selected_columns = ["age", "sex", "ratio1", "ratio2", "has_recent"]
-    df = df[selected_columns]
-
-    # Fill missing values with median
-    df = df.fillna(df.median())
-
-    # Scale features
-    scaler = StandardScaler()
-    df[["age", "ratio1", "ratio2"]] = scaler.fit_transform(
-        df[["age", "ratio1", "ratio2"]]
-    )
-
-    return df
-
-
-def preprocess_targets(df: pd.DataFrame) -> pd.Series:
-    return (df["aki"] == "y").astype(int)
-
-
-class Net(nn.Module):
-    def __init__(self, input_size: int) -> None:
-        super(Net, self).__init__()
-        self.layer1: nn.Linear = nn.Linear(input_size, 16)
-        self.layer2: nn.Linear = nn.Linear(16, 8)
-        self.layer3: nn.Linear = nn.Linear(8, 1)
-        self.relu: nn.ReLU = nn.ReLU()
-        self.dropout: nn.Dropout = nn.Dropout(0.2)
-        self.sigmoid: nn.Sigmoid = nn.Sigmoid()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        x = self.dropout(self.relu(self.layer1(x)))
-        x = self.relu(self.layer2(x))
-        x = self.sigmoid(self.layer3(x))
-        return x.squeeze()
-
-
-def train_model():
-    # Load preprocessed data
-    # train_df = pd.read_csv("training.csv")
-    # test_df = pd.read_csv("test.csv")
-    train_df = pd.read_csv("training.csv").head(100)
-    test_df = pd.read_csv("test.csv").head(100)
-
-    # Prepare features and target
-    X_train = preprocess_features(train_df)
-    y_train = preprocess_targets(train_df)
-    X_test = preprocess_features(test_df)
-    y_test = preprocess_targets(test_df)
-
-    # Convert to PyTorch tensors
-    X_train_tensor = torch.FloatTensor(X_train.values)
-    y_train_tensor = torch.FloatTensor(y_train.values)
-    X_test_tensor = torch.FloatTensor(X_test.values)
-    y_test_tensor = torch.FloatTensor(y_test.values)
-
-    # Initialize model, loss and optimizer
-    model = Net(X_train.shape[1])
-    criterion = nn.BCELoss()
-    optimizer = optim.Adam(model.parameters())
-
-    # Training loop
-    n_epochs = 500
-    batch_size = 32
-    best_f3 = 0
-    best_model_state = None
-
-    for epoch in range(n_epochs):
-        model.train()
-        for i in range(0, len(X_train_tensor), batch_size):
-            batch_X = X_train_tensor[i : i + batch_size]
-            batch_y = y_train_tensor[i : i + batch_size]
-
-            optimizer.zero_grad()
-            outputs = model(batch_X)
-            loss = criterion(outputs.squeeze(), batch_y)
-            loss.backward()
-            optimizer.step()
-
-        # Validation
-        model.eval()
-        with torch.no_grad():
-            # Training metrics
-            train_outputs = model(X_train_tensor)
-            train_predictions = (train_outputs > 0.5).float()
-            train_accuracy = (train_predictions == y_train_tensor).float().mean()
-            train_f3 = fbeta_score(
-                y_train_tensor.numpy(), train_predictions.numpy(), beta=3
-            )
-
-            # Test metrics
-            test_outputs = model(X_test_tensor)
-            test_predictions = (test_outputs > 0.5).float()
-            test_accuracy = (test_predictions == y_test_tensor).float().mean()
-            test_f3 = fbeta_score(
-                y_test_tensor.numpy(), test_predictions.numpy(), beta=3
-            )
-
-            print(
-                f"Epoch {epoch + 1}/{n_epochs}, "
-                f"Train Accuracy: {train_accuracy:.3f}, Train F3: {train_f3:.3f}, "
-                f"Test Accuracy: {test_accuracy:.3f}, Test F3: {test_f3:.3f}"
-            )
-
-            # Save best model
-            if test_f3 > best_f3:
-                best_f3 = test_f3
-                best_model_state = model.state_dict()
-
-    # Load best model state
-    assert best_model_state is not None
-    model.load_state_dict(best_model_state)
-    torch.save(best_model_state, "best_model.pth")
-    print(f"Best F3 score: {best_f3:.3f}")
-
-    return model
-
-
-if __name__ == "__main__":
-    train_model()
--- a/uv.lock
+++ b/uv.lock
@@ -28,6 +28,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862 },
 ]

+[[package]]
+name = "iniconfig"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.5"
@@ -297,6 +306,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
 ]

+[[package]]
+name = "packaging"
+version = "24.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
+]
+
 [[package]]
 name = "pandas"
 version = "2.2.3"
@@ -331,6 +349,30 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436 },
 ]

+[[package]]
+name = "pluggy"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/2d/02d4312c973c6050a18b314a5ad0b3210edb65a906f868e31c111dede4a6/pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1", size = 67955 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
+]
+
+[[package]]
+name = "pytest"
+version = "8.3.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -463,6 +505,7 @@ version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
    { name = "pandas" },
+    { name = "pytest" },
    { name = "scikit-learn" },
    { name = "torch" },
    { name = "tqdm" },
@@ -477,6 +520,7 @@ dev = [
 [package.metadata]
 requires-dist = [
    { name = "pandas", specifier = ">=2.2.3" },
+    { name = "pytest", specifier = ">=8.3.4" },
    { name = "scikit-learn", specifier = ">=1.6.1" },
    { name = "torch", specifier = ">=2.5.1" },
    { name = "tqdm", specifier = ">=4.67.1" },