From b4ee7bb339bd73aa0302dd753f256db764443af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=BCnyamin=20Dincer?= <bunyamin.dincer2003@gmail.com> Date: Mon, 15 Jul 2024 17:44:01 +0200 Subject: [PATCH] Update breast_cancer_queries.py --- breast_cancer_queries.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/breast_cancer_queries.py b/breast_cancer_queries.py index 79372f8..5c67bae 100644 --- a/breast_cancer_queries.py +++ b/breast_cancer_queries.py @@ -4,10 +4,12 @@ import csv import sys from hle import high_level_single # our code +# Load dataset from CSV file with open('data/breast-cancer.csv', 'r') as f: reader = csv.reader(f, delimiter=';') full_dataset = list(reader) +# Define feature names features = { 'clumpThickness': 'numeric', 'uniformityCellSize': 'numeric', @@ -20,16 +22,18 @@ features = { 'mitoses': 'numeric', } +# Define Class Names class_names = ['benign', 'melignant'] feature_names = list(features.keys()) feature_types = list(features.values()) -# because of binary features with values that are not 0 or 1. +# because of binary features with values that are not 0 or 1. (not needed here, leaving it just in case we need it) feature_mapping = { } +# Process row of features from dataset def process_features_student(row): to_delete = [0] cpy = [] @@ -43,21 +47,26 @@ def process_features_student(row): assert len(cpy) == len(feature_names) return cpy + +# Process Class Label def process_class(val): if float(val) >= 3: # good grade is a grade in [10, 20]. Bad grade is [0, 10) return 0 else: return 1 +# Prepare dataset by splitting features and labels dataset = full_dataset[1:] X = [ process_features_student(data[:-1]) for data in dataset] y = [ process_class(data[-1]) for data in dataset] +# Init and Train decision tree classifier cancer_clf = DecisionTreeClassifier(max_leaf_nodes=400, random_state=0) cancer_clf.fit(X, y) print('DecisionTreeClassifier has been trained') +# Example Queries (feel free to add more) q1 = 'exists p1, exists p2, benign(p1) implies benign(p2)' q2 = 'exists p1, exists p2, p1.blandChromatin > 3 and p2.marginalAdhesion <= 3 and melignant(p1) implies benign(p2)' q3 = 'for every patient, patient.blandChromatin > 4 implies melignant(patient)' @@ -70,7 +79,7 @@ q6 = ('exists p1, exists p2, p1.mitoses <= 2 implies melignant(p1)' 'and p2.blandChromatin > 9 implies p1.blandChromatin <= 3') - +# Eval Example Queries def example_queries(): queries = [q1,q2,q3,q4,q5,q6] avg = 0 -- GitLab