Skip to content
Snippets Groups Projects
Commit 34ff75d3 authored by mmzk1526's avatar mmzk1526
Browse files

Auto format

parent d362f66a
No related branches found
No related tags found
No related merge requests found
......@@ -13,7 +13,8 @@ from transformers import Trainer
def get_optimal_hyperparameters(env: Env) -> dict[str: any]:
print(f"device = {env.device}")
train_data, test_data = tools.preprocess_train_data("data/train.csv", env, upscale_factor=7)
train_data, test_data = tools.preprocess_train_data(
"data/train.csv", env, upscale_factor=7)
# validate_data = tools.preprocess_test_data("data/dev.csv", env)
with env.tuning_toggle:
......@@ -60,7 +61,8 @@ if __name__ == '__main__':
logger.addHandler(logging.FileHandler(log_path, mode="w+"))
optuna.logging.enable_propagation() # Propagate logs to the root logger.
optuna.logging.disable_default_handler() # Stop showing logs in sys.stderr.
# Stop showing logs in sys.stderr.
optuna.logging.disable_default_handler()
# Perform tuning
configs_optim: dict[str, any] = get_optimal_hyperparameters(env)
......
......@@ -19,10 +19,14 @@ def preprocess_train_data(path: str, env: Env, upscale_factor: int = 7) -> tuple
dataset = Dataset.from_pandas(df[['label', 'text']]).map(lambda d: env.tokeniser(str(d['text']), truncation=True),
batched=False)
dataset_0_split = dataset.filter(lambda x: x['label'] == 0).train_test_split(test_size=0.2)
dataset_1_split = dataset.filter(lambda x: x['label'] == 1).train_test_split(test_size=0.2)
dataset_train = ds.concatenate_datasets([dataset_0_split['train']] + [dataset_1_split['train']] * upscale_factor)
dataset_test = ds.concatenate_datasets([dataset_0_split['test'], dataset_1_split['test']])
dataset_0_split = dataset.filter(
lambda x: x['label'] == 0).train_test_split(test_size=0.2)
dataset_1_split = dataset.filter(
lambda x: x['label'] == 1).train_test_split(test_size=0.2)
dataset_train = ds.concatenate_datasets(
[dataset_0_split['train']] + [dataset_1_split['train']] * upscale_factor)
dataset_test = ds.concatenate_datasets(
[dataset_0_split['test'], dataset_1_split['test']])
return dataset_train, dataset_test
......
......@@ -8,7 +8,8 @@ from transformers import Trainer
def train(env: Env) -> None:
print(f"device = {env.device}")
train_data, _ = tools.preprocess_train_data("data/train.csv", env, upscale_factor=7)
train_data, _ = tools.preprocess_train_data(
"data/train.csv", env, upscale_factor=7)
validate_data = tools.preprocess_test_data("data/dev.csv", env)
trainer = Trainer(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment