Skip to content

THIS IS SPAM paper2025 #4057

@ahmedmmana

Description

@ahmedmmana

!pip install pandas numpy scikit-learn xgboost seaborn matplotlib tensorflow -q

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import random
import warnings
warnings.filterwarnings('ignore')

===================== 1. الجزء الجدولي (UCI) =====================

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
df = pd.read_excel(url)

df.columns = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'strength']

df['w_c_ratio'] = df['water'] / df['cement']

print(f"عدد العينات الكلي (UCI): {len(df)}")

print("\nعينة عشوائية من 500 صف:")
df_sample = df.sample(n=500, random_state=42)
print(df_sample.head(10))

print("\nإحصائيات العينة العشوائية (500 صف):")
print(df_sample.describe().round(2))

def label_crack(row):
if (row['w_c_ratio'] <= 0.45 and
row['age'] >= 14 and
row['strength'] >= 35 and
row['superplasticizer'] > 3):
return 0
else:
return 1

df['label'] = df.apply(label_crack, axis=1)

np.random.seed(42)
flip_mask = np.random.choice([True, False], size=len(df), p=[0.1, 0.9])
df.loc[flip_mask, 'label'] = 1 - df.loc[flip_mask, 'label']

print("\nتوزيع الفئات (جدولي):")
print(df['label'].value_counts(normalize=True) * 100)

features = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'w_c_ratio']

X_tab = df[features]
y = df['label']

X_train_tab, X_test_tab, y_train, y_test = train_test_split(
X_tab, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_tab_scaled = scaler.fit_transform(X_train_tab)
X_test_tab_scaled = scaler.transform(X_test_tab)

model_tab = xgb.XGBClassifier(
n_estimators=600,
max_depth=6,
learning_rate=0.05,
subsample=0.9,
colsample_bytree=0.9,
reg_lambda=1.0,
objective='binary:logistic',
random_state=42,
eval_metric='logloss'
)

model_tab.fit(X_train_tab_scaled, y_train)

y_prob_tab = model_tab.predict_proba(X_test_tab_scaled)[:, 1]
y_pred_tab = (y_prob_tab >= 0.5).astype(int)
acc_tab = accuracy_score(y_test, y_pred_tab)

print(f"\nنتائج الجزء الجدولي فقط:")
print(f"Accuracy: {acc_tab*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred_tab))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_tab))

رسم Confusion Matrix

plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, y_pred_tab), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Tabular-only Model')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

===================== 2. الجزء البصري (CCIC + EfficientNetB0) =====================

افترض إنك رفعت المجلد (غيّر المسار لو لازم)

base_dir = "/content/Concrete Crack Images for Classification"

crack_dir = os.path.join(base_dir, "Positive")
no_crack_dir = os.path.join(base_dir, "Negative")

print("\nتأكيد الصور:")
print("Crack:", len(os.listdir(crack_dir)), "صورة")
print("No-Crack:", len(os.listdir(no_crack_dir)), "صورة")

اختيار 1000 صورة (500 لكل فئة - تعديلك)

random.seed(42)
selected_crack = random.sample(os.listdir(crack_dir), 500)
selected_no_crack = random.sample(os.listdir(no_crack_dir), 500)

all_paths = [os.path.join(crack_dir, img) for img in selected_crack] +
[os.path.join(no_crack_dir, img) for img in selected_no_crack]
labels_vision = [1]*500 + [0]*500

print(f"\nعدد الصور المختارة (بصري): {len(all_paths)}")

base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
base_model.trainable = False

def extract_embeddings(paths):
embeddings = []
for path in paths:
img = load_img(path, target_size=(224,224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)
emb = base_model.predict(img_array, verbose=0)[0]
embeddings.append(emb)
return np.array(embeddings)

print("\nاستخراج الميزات البصرية...")
vision_embeddings = extract_embeddings(all_paths)

تدريب نموذج بصري بسيط (XGBoost على embeddings)

X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(
vision_embeddings, labels_vision, test_size=0.2, random_state=42, stratify=labels_vision)

model_vis = xgb.XGBClassifier(
n_estimators=300, # أقل شوية للصور
max_depth=4,
learning_rate=0.05,
random_state=42
)

model_vis.fit(X_train_vis, y_train_vis)

y_prob_vis = model_vis.predict_proba(X_test_vis)[:, 1]
y_pred_vis = (y_prob_vis >= 0.5).astype(int)
acc_vis = accuracy_score(y_test_vis, y_pred_vis)

print(f"\nنتائج الجزء البصري فقط:")
print(f"Accuracy: {acc_vis*100:.2f}%")

===================== 3. Late Fusion =====================

استخدم probabilities من test set (للـ tabular وvision)

alpha = 0.4 # التعديل اللي طلبت (من 0.35 لـ 0.4)

hybrid_probs = alpha * y_prob_tab + (1 - alpha) * y_prob_vis # افترض y_prob_vis محاذاة
hybrid_preds = (hybrid_probs >= 0.5).astype(int)

acc_hybrid = accuracy_score(y_test, hybrid_preds) # افترض y_test محاذاة

print(f"\nنتائج Hybrid (Late Fusion):")
print(f"Accuracy: {acc_hybrid*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, hybrid_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, hybrid_preds))

رسم Confusion Matrix للـ Hybrid

plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, hybrid_preds), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Hybrid Model (Late Fusion)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

===================== 4. رسوم بيانية إضافية =====================

ROC Curve للـ Hybrid

fpr, tpr, _ = roc_curve(y_test, hybrid_probs)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Hybrid Model (Late Fusion)')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

Comparison Bar Chart للدقة

models = ['Tabular-only', 'Vision-only', 'Hybrid (late fusion)']
accuracies = [acc_tab, acc_vis, acc_hybrid]

plt.figure(figsize=(8,5))
sns.barplot(x=models, y=accuracies, palette='viridis')
plt.ylim(0, 1)
plt.title('Comparison of Accuracy Between Models')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.bar_label(plt.gca().containers[0], fmt='%.2f')
plt.grid(True, axis='y')
plt.show()

print("\nالكود الكامل مع النتايج مدمجة - جاهز للـ GitHub أو Supplementary Material")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions