-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTensorFlow.py
More file actions
116 lines (63 loc) · 2.1 KB
/
TensorFlow.py
File metadata and controls
116 lines (63 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# coding: utf-8
# In[8]:
get_ipython().system('pip install numpy pandas tensorflow tensorflow_hub scikit-learn openpyxl')
get_ipython().system('pip install --upgrade tensorflow tensorflow_hub')
# In[15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
# Step 1: Load the data
data = pd.read_excel("Medical_data.xlsx")
# Step 2: Preprocess the data
X_text = data['Gender'].values
y = data['Condition'].values
# Step 3: Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
# Step 4: Tokenize text data
max_words = 1000 # Maximum number of words to keep
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_text)
X_seq = tokenizer.texts_to_sequences(X_text)
# Pad sequences to ensure uniform length
max_len = max(len(seq) for seq in X_seq)
X_pad = pad_sequences(X_seq, maxlen=max_len)
# Step 5: Build the neural network model
embedding_dim = 50
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Step 6: Train the model
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)
# Step 7: Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)
# In[16]:
model.summary()
# In[27]:
data.head(25)
# In[25]:
data.shape
# In[26]:
data.info()
# In[29]:
data.tail(15)
# In[30]:
data.columns
# In[32]:
data.isnull().sum()
# In[33]:
data.duplicated().sum
# In[35]:
data['Condition'].value_counts()