Skip to content
Snippets Groups Projects
Commit c4b6adf7 authored by OnurZa's avatar OnurZa
Browse files

Add comments to the code

parent e1102ad8
Branches master
No related tags found
No related merge requests found
......@@ -9,31 +9,44 @@ import xgboost as xbg
def preprocess_data(data):
# Always get the last column for each row
"""
Preprocess the input data.
Extract the last createnine result
One-how encode the "sex" column
Calculate the average createnine result excluding the last one
Calculate the minimum createnine
"""
# Identify all the createnine result columns
createnine_result_columns = [col for col in data.columns if col.startswith("creatinine_result_")]
# Extract the last createnine result
data["last_createnine_result"] = data[createnine_result_columns].apply(
lambda x: x.dropna().iloc[-1] if not x.dropna().empty else None, axis=1)
# One-hot encode the sex column
# One-hot encode the "sex" column
data["sex_encoded"] = data['sex'].map({'f': 0, 'm': 1})
# Calculate the average createnine result excluding the last one
def average_excluding_last(row):
values = row.dropna()
if len(values) > 1:
return values[:-1].astype(float).mean()
else:
return 0
return 0
data["creatinine_avg_excluding_last"] = data[createnine_result_columns].apply(average_excluding_last, axis=1)
# Calculate the minimum createnine result
def calculate_min_value(row):
values = row.dropna().astype(float)
return values.min() if not values.empty else 0
data["creatinine_min"] = data[createnine_result_columns].apply(calculate_min_value, axis=1)
# Define the columns to select based on whether "aki" is present in the data
if "aki" not in data.columns:
columns_to_select = ["age", "last_createnine_result", "sex_encoded",
"creatinine_avg_excluding_last", "creatinine_min"]
......@@ -41,9 +54,7 @@ def preprocess_data(data):
columns_to_select = ["age", "last_createnine_result", "sex_encoded",
"creatinine_avg_excluding_last", "creatinine_min", "aki"]
# Select and return the columns
selected_columns = data[columns_to_select]
return selected_columns
......@@ -55,24 +66,28 @@ def main():
parser.add_argument("--output", default="aki.csv")
flags = parser.parse_args()
# Read the train and test data
training_data = pd.read_csv("data/training.csv")
test_data = pd.read_csv(flags.input)
# Write the predictions to the output file
w = csv.writer(open(flags.output, "w"))
w.writerow(("aki",))
# Preprocess training and testing data
training_data = preprocess_data(training_data)
testing_data = preprocess_data(test_data)
# Prepare the training features and labels
X_train = training_data.drop(columns=["aki"])
target = "aki"
y_train = training_data[target].map({'y': 1, 'n': 0})
# Use XGBoost to train the model
model = xbg.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
model.fit(X_train, y_train)
# Handle test data based on whether "aki" is present
if "aki" in testing_data.columns:
testing_data = testing_data.dropna(subset=["aki"])
y_test = testing_data["aki"].map({'y': 1, 'n': 0}).values
......@@ -81,6 +96,7 @@ def main():
y_test = None
X_test = testing_data
# Predict AKI outcomes
y_pred = model.predict(X_test)
for i in y_pred:
if i == 0:
......@@ -93,7 +109,6 @@ def main():
f3_score = fbeta_score(y_test, y_pred, beta=3)
print("F3 score: ", f3_score)
#w.writerow((random.choice(["y", "n"]),))
if __name__ == "__main__":
main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment