Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
from evaluate import load
golden = pd.read_csv("data/dev.csv", sep=",", escapechar="\\")
prediction = pd.read_csv("output/complete/deberta-base_f1_multiclass_2_chktp794_dev.csv", sep=",", escapechar="\\")
# rename column "label" to "label_prediction"
prediction = prediction.rename(columns={"label": "label_prediction"})
combined = pd.concat([golden, prediction], axis=1)
def f1_score() -> float:
golden_labels = golden["is_patronising"].to_list()
prediction_labels = prediction["pcl"].to_list()
f1 = load("f1")
return f1.compute(predictions=prediction_labels, references=golden_labels)
def recall() -> dict[int, float]:
golden_labels_0 = combined[golden["label"] == 0]
golden_labels_1 = combined[golden["label"] == 1]
golden_labels_2 = combined[golden["label"] == 2]
golden_labels_3 = combined[golden["label"] == 3]
golden_labels_4 = combined[golden["label"] == 4]
accuracies = {0: len(golden_labels_0[golden_labels_0["pcl"] == 0]) / len(golden_labels_0),
1: len(golden_labels_1[golden_labels_1["pcl"] == 0]) / len(golden_labels_1),
2: len(golden_labels_2[golden_labels_2["pcl"] == 1]) / len(golden_labels_2),
3: len(golden_labels_3[golden_labels_3["pcl"] == 1]) / len(golden_labels_3),
4: len(golden_labels_4[golden_labels_4["pcl"] == 1]) / len(golden_labels_4)}
return accuracies
def accuracy() -> dict[int, float]:
prediction_labels_0 = combined[combined["label_prediction"] == 0]
prediction_labels_1 = combined[combined["label_prediction"] == 1]
prediction_labels_2 = combined[combined["label_prediction"] == 2]
prediction_labels_3 = combined[combined["label_prediction"] == 3]
prediction_labels_4 = combined[combined["label_prediction"] == 4]
recalls = {0: len(prediction_labels_0[prediction_labels_0["is_patronising"] == 0]) / len(prediction_labels_0),
1: len(prediction_labels_1[prediction_labels_1["is_patronising"] == 0]) / len(prediction_labels_1),
2: len(prediction_labels_2[prediction_labels_2["is_patronising"] == 1]) / len(prediction_labels_2),
3: len(prediction_labels_3[prediction_labels_3["is_patronising"] == 1]) / len(prediction_labels_3),
4: len(prediction_labels_4[prediction_labels_4["is_patronising"] == 1]) / len(prediction_labels_4)}
return recalls
if __name__ == '__main__':
print("F1:", f1_score())
print("Recall:", recall())
print("Accuracy:", accuracy())