diff --git a/results/config_snapshot_20250718_001955.py b/results/config_snapshot_20250718_001955.py new file mode 100644 index 0000000..1ab2ee5 --- /dev/null +++ b/results/config_snapshot_20250718_001955.py @@ -0,0 +1,58 @@ +class Config: + # Models to train + MODELS_TO_TRAIN = ["MobileNetV2"] + + # Data parameters + IMG_SIZE = (224, 224) + BATCH_SIZE = 32 + TRAIN_SPLIT = 0.80 + VALIDATION_SPLIT = 0.25 + + # Training parameters + INITIAL_EPOCHS = 20 + FINE_TUNE_EPOCHS = 10 + INITIAL_LEARNING_RATE = 0.0005 + FINE_TUNE_LEARNING_RATE = 0.0001 + + # Model parameters + NUM_CLASSES = 23 + DROPOUT_RATE = 0.5 + NUM_FROZEN_LAYERS = 2 # Number of layers to freeze in feature extraction + + # Enhanced Architecture Features (set to True to enable) + USE_ENHANCED_ARCHITECTURE = False # Multi-branch architecture with attention + USE_PROGRESSIVE_UNFREEZING = False # Progressive unfreezing strategy + USE_ADVANCED_AUGMENTATION = False # Advanced data augmentation + USE_ENSEMBLE_LEARNING = False # Ensemble learning + USE_TEST_TIME_AUGMENTATION = False # Test-time augmentation + + # Progressive unfreezing parameters + PROGRESSIVE_PHASES = 3 # Number of progressive unfreezing phases + + # Ensemble learning parameters + ENSEMBLE_STRATEGY = 'soft_voting' # 'soft_voting', 'hard_voting', 'weighted_voting', 'stacking' + ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"] + META_LEARNER_EPOCHS = 50 + + # Test-time augmentation parameters + TTA_AUGMENTATIONS = 5 # Number of augmentations per test image + + # Advanced training parameters + USE_MIXUP = False # Mixup data augmentation + USE_CUTMIX = False # CutMix data augmentation + USE_LABEL_SMOOTHING = False # Label smoothing + LABEL_SMOOTHING_FACTOR = 0.1 + + # Learning rate scheduling + USE_COSINE_ANNEALING = False # Cosine annealing scheduler + USE_WARM_RESTARTS = False # Warm restarts + + # Regularization + USE_DROPOUT_SCHEDULING = False # Adaptive dropout scheduling + USE_WEIGHT_DECAY = False # L2 weight decay + WEIGHT_DECAY_FACTOR = 1e-4 + + # Paths + DATA_DIR = "data" + MODEL_SAVE_DIR = "saved_models" + RESULTS_DIR = "results" diff --git a/results/model_comparison_20250718_001955.csv b/results/model_comparison_20250718_001955.csv new file mode 100644 index 0000000..840ad19 --- /dev/null +++ b/results/model_comparison_20250718_001955.csv @@ -0,0 +1,2 @@ +,Accuracy (%),Loss,Size (MB),Parameters +MobileNetV2,49.59,1.6897,9.3222,2287447 diff --git a/results/training_comparison_20250718_001955.png b/results/training_comparison_20250718_001955.png new file mode 100644 index 0000000..6636470 Binary files /dev/null and b/results/training_comparison_20250718_001955.png differ diff --git a/results/training_history_20250718_001955.csv b/results/training_history_20250718_001955.csv new file mode 100644 index 0000000..e126c47 --- /dev/null +++ b/results/training_history_20250718_001955.csv @@ -0,0 +1,31 @@ +model,epoch,train_accuracy,val_accuracy,train_loss,val_loss +MobileNetV2_feature_extraction,1,0.10285714268684387,0.1804979294538498,3.2679028511047363,2.7532925605773926 +MobileNetV2_feature_extraction,2,0.22207792103290558,0.28215768933296204,2.6492979526519775,2.5107262134552 +MobileNetV2_feature_extraction,3,0.32181817293167114,0.3381742835044861,2.3156282901763916,2.323220729827881 +MobileNetV2_feature_extraction,4,0.37584415078163147,0.3838174343109131,2.1061758995056152,2.228482484817505 +MobileNetV2_feature_extraction,5,0.4171428680419922,0.3921161890029907,1.9495772123336792,2.1282544136047363 +MobileNetV2_feature_extraction,6,0.4527272582054138,0.4066390097141266,1.840901255607605,2.0412991046905518 +MobileNetV2_feature_extraction,7,0.47220778465270996,0.4273858964443207,1.7568695545196533,1.970870852470398 +MobileNetV2_feature_extraction,8,0.5070129632949829,0.4585062265396118,1.6532152891159058,1.9253556728363037 +MobileNetV2_feature_extraction,9,0.5210389494895935,0.46265560388565063,1.5946297645568848,1.8834987878799438 +MobileNetV2_feature_extraction,10,0.5425974130630493,0.47095435857772827,1.5383957624435425,1.8493201732635498 +MobileNetV2_feature_extraction,11,0.5488311648368835,0.46265560388565063,1.49072265625,1.7981990575790405 +MobileNetV2_feature_extraction,12,0.5703896284103394,0.4605809152126312,1.4195979833602905,1.7736471891403198 +MobileNetV2_feature_extraction,13,0.5807791948318481,0.4564315378665924,1.4124072790145874,1.7660542726516724 +MobileNetV2_feature_extraction,14,0.5774025917053223,0.46887966990470886,1.3677922487258911,1.7547590732574463 +MobileNetV2_feature_extraction,15,0.6015584468841553,0.46680498123168945,1.3222650289535522,1.734666347503662 +MobileNetV2_feature_extraction,16,0.6051948070526123,0.46473029255867004,1.3051408529281616,1.7301479578018188 +MobileNetV2_feature_extraction,17,0.6038960814476013,0.4730290472507477,1.2859197854995728,1.7186366319656372 +MobileNetV2_feature_extraction,18,0.6283116936683655,0.4792531132698059,1.2456966638565063,1.712315559387207 +MobileNetV2_feature_extraction,19,0.6168830990791321,0.4792531132698059,1.2730422019958496,1.7302953004837036 +MobileNetV2_feature_extraction,20,0.621558427810669,0.4792531132698059,1.2226208448410034,1.705318808555603 +MobileNetV2_fine_tuned,1,0.635064959526062,0.48547717928886414,1.1813465356826782,1.7019144296646118 +MobileNetV2_fine_tuned,2,0.6475324630737305,0.48755186796188354,1.172446846961975,1.6987230777740479 +MobileNetV2_fine_tuned,3,0.6496104001998901,0.4834024906158447,1.1667516231536865,1.7070080041885376 +MobileNetV2_fine_tuned,4,0.6407791972160339,0.4958506226539612,1.1737589836120605,1.6940451860427856 +MobileNetV2_fine_tuned,5,0.6376623511314392,0.48962655663490295,1.1516849994659424,1.7008932828903198 +MobileNetV2_fine_tuned,6,0.6342856884002686,0.4937759339809418,1.1716210842132568,1.6919862031936646 +MobileNetV2_fine_tuned,7,0.6249350905418396,0.4958506226539612,1.1754311323165894,1.6897497177124023 +MobileNetV2_fine_tuned,8,0.649350643157959,0.4958506226539612,1.1520062685012817,1.701900601387024 +MobileNetV2_fine_tuned,9,0.6594805121421814,0.49170124530792236,1.1290615797042847,1.6990609169006348 +MobileNetV2_fine_tuned,10,0.6503896117210388,0.4813278019428253,1.1408472061157227,1.6991528272628784 diff --git a/src/config.py b/src/config.py index 4ee4302..1ab2ee5 100644 --- a/src/config.py +++ b/src/config.py @@ -19,6 +19,39 @@ class Config: DROPOUT_RATE = 0.5 NUM_FROZEN_LAYERS = 2 # Number of layers to freeze in feature extraction + # Enhanced Architecture Features (set to True to enable) + USE_ENHANCED_ARCHITECTURE = False # Multi-branch architecture with attention + USE_PROGRESSIVE_UNFREEZING = False # Progressive unfreezing strategy + USE_ADVANCED_AUGMENTATION = False # Advanced data augmentation + USE_ENSEMBLE_LEARNING = False # Ensemble learning + USE_TEST_TIME_AUGMENTATION = False # Test-time augmentation + + # Progressive unfreezing parameters + PROGRESSIVE_PHASES = 3 # Number of progressive unfreezing phases + + # Ensemble learning parameters + ENSEMBLE_STRATEGY = 'soft_voting' # 'soft_voting', 'hard_voting', 'weighted_voting', 'stacking' + ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"] + META_LEARNER_EPOCHS = 50 + + # Test-time augmentation parameters + TTA_AUGMENTATIONS = 5 # Number of augmentations per test image + + # Advanced training parameters + USE_MIXUP = False # Mixup data augmentation + USE_CUTMIX = False # CutMix data augmentation + USE_LABEL_SMOOTHING = False # Label smoothing + LABEL_SMOOTHING_FACTOR = 0.1 + + # Learning rate scheduling + USE_COSINE_ANNEALING = False # Cosine annealing scheduler + USE_WARM_RESTARTS = False # Warm restarts + + # Regularization + USE_DROPOUT_SCHEDULING = False # Adaptive dropout scheduling + USE_WEIGHT_DECAY = False # L2 weight decay + WEIGHT_DECAY_FACTOR = 1e-4 + # Paths DATA_DIR = "data" MODEL_SAVE_DIR = "saved_models" diff --git a/src/data_processor.py b/src/data_processor.py index c068a9e..845e453 100644 --- a/src/data_processor.py +++ b/src/data_processor.py @@ -12,12 +12,11 @@ def create_data_generators(self, train_dir, validation_dir=None): If validation_dir is not provided, split train_dir using validation_split. """ - # Data augmentation pipeline - data_augmentation = tf.keras.Sequential([ - layers.RandomFlip("horizontal"), - layers.RandomRotation(0.1), - layers.RandomZoom(0.1), - ]) + # Choose augmentation strategy based on config + if hasattr(self.config, 'USE_ADVANCED_AUGMENTATION') and self.config.USE_ADVANCED_AUGMENTATION: + data_augmentation = self._create_advanced_augmentation() + else: + data_augmentation = self._create_basic_augmentation() if validation_dir: train_ds = tf.keras.utils.image_dataset_from_directory( @@ -75,6 +74,62 @@ def create_data_generators(self, train_dir, validation_dir=None): return train_ds, val_ds + def _create_basic_augmentation(self): + """Create basic data augmentation pipeline.""" + return tf.keras.Sequential([ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), + layers.RandomZoom(0.1), + ]) + + def _create_advanced_augmentation(self): + """Create advanced data augmentation pipeline with more sophisticated techniques.""" + return tf.keras.Sequential([ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.2), + layers.RandomZoom(0.15), + layers.RandomContrast(0.2), + layers.RandomBrightness(0.2), + layers.RandomTranslation(0.1, 0.1), + # Custom augmentation for better leaf/tree feature learning + layers.Lambda(lambda x: tf.image.random_hue(x, 0.1)), + layers.Lambda(lambda x: tf.image.random_saturation(x, 0.8, 1.2)), + layers.Lambda(lambda x: tf.image.random_jpeg_quality(x, 85, 100)), + ]) + + def create_test_time_augmentation_dataset(self, test_ds, n_augmentations=5): + """ + Create test-time augmentation dataset for improved inference accuracy. + """ + augmentation = tf.keras.Sequential([ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.05), + layers.RandomZoom(0.05), + layers.RandomBrightness(0.1), + ]) + + def augment_batch(x, y): + # Create multiple augmented versions of each image + augmented_images = [] + for _ in range(n_augmentations): + aug_x = augmentation(x) + augmented_images.append(aug_x) + + # Stack all augmented versions + stacked = tf.stack(augmented_images, axis=1) # Shape: (batch, n_aug, height, width, channels) + + # Reshape to treat each augmentation as a separate sample + batch_size = tf.shape(x)[0] + reshaped = tf.reshape(stacked, (-1, *x.shape[1:])) + + # Repeat labels for each augmentation + repeated_labels = tf.repeat(y, n_augmentations, axis=0) + + return reshaped, repeated_labels + + tta_ds = test_ds.map(augment_batch) + return tta_ds, n_augmentations + def prepare_dataset_from_directory(self, data_dir): """ Prepare dataset from a directory structure with subdirectories for each class. diff --git a/src/enhanced_config.py b/src/enhanced_config.py new file mode 100644 index 0000000..1f0d7cf --- /dev/null +++ b/src/enhanced_config.py @@ -0,0 +1,128 @@ +class EnhancedConfig: + """ + Enhanced configuration demonstrating all the new architectural improvements. + This configuration enables all advanced features for maximum performance. + """ + + # Models to train + MODELS_TO_TRAIN = ["MobileNetV2"] + + # Data parameters + IMG_SIZE = (224, 224) + BATCH_SIZE = 32 + TRAIN_SPLIT = 0.80 + VALIDATION_SPLIT = 0.25 + + # Training parameters + INITIAL_EPOCHS = 25 + FINE_TUNE_EPOCHS = 15 + INITIAL_LEARNING_RATE = 0.0005 + FINE_TUNE_LEARNING_RATE = 0.0001 + + # Model parameters + NUM_CLASSES = 23 + DROPOUT_RATE = 0.5 + NUM_FROZEN_LAYERS = 2 + + # Enhanced Architecture Features - ALL ENABLED + USE_ENHANCED_ARCHITECTURE = True # Multi-branch architecture with attention + USE_PROGRESSIVE_UNFREEZING = True # Progressive unfreezing strategy + USE_ADVANCED_AUGMENTATION = True # Advanced data augmentation + USE_ENSEMBLE_LEARNING = True # Ensemble learning + USE_TEST_TIME_AUGMENTATION = True # Test-time augmentation + + # Progressive unfreezing parameters + PROGRESSIVE_PHASES = 3 + + # Ensemble learning parameters + ENSEMBLE_STRATEGY = 'stacking' # Use stacking for best performance + ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"] + META_LEARNER_EPOCHS = 50 + + # Test-time augmentation parameters + TTA_AUGMENTATIONS = 7 # More augmentations for better accuracy + + # Advanced training parameters + USE_MIXUP = True + USE_CUTMIX = True + USE_LABEL_SMOOTHING = True + LABEL_SMOOTHING_FACTOR = 0.1 + + # Learning rate scheduling + USE_COSINE_ANNEALING = True + USE_WARM_RESTARTS = True + + # Regularization + USE_DROPOUT_SCHEDULING = True + USE_WEIGHT_DECAY = True + WEIGHT_DECAY_FACTOR = 1e-4 + + # Paths + DATA_DIR = "data" + MODEL_SAVE_DIR = "saved_models" + RESULTS_DIR = "results" + + +class ConservativeConfig: + """ + Conservative configuration that enables only the most proven improvements. + Good for users who want better performance without experimental features. + """ + + # Models to train + MODELS_TO_TRAIN = ["MobileNetV2"] + + # Data parameters + IMG_SIZE = (224, 224) + BATCH_SIZE = 32 + TRAIN_SPLIT = 0.80 + VALIDATION_SPLIT = 0.25 + + # Training parameters + INITIAL_EPOCHS = 20 + FINE_TUNE_EPOCHS = 10 + INITIAL_LEARNING_RATE = 0.0005 + FINE_TUNE_LEARNING_RATE = 0.0001 + + # Model parameters + NUM_CLASSES = 23 + DROPOUT_RATE = 0.5 + NUM_FROZEN_LAYERS = 2 + + # Enhanced Architecture Features - CONSERVATIVE SELECTION + USE_ENHANCED_ARCHITECTURE = False # Keep original architecture + USE_PROGRESSIVE_UNFREEZING = True # Proven to work well + USE_ADVANCED_AUGMENTATION = True # Almost always beneficial + USE_ENSEMBLE_LEARNING = False # Keep it simple + USE_TEST_TIME_AUGMENTATION = True # Low risk, good reward + + # Progressive unfreezing parameters + PROGRESSIVE_PHASES = 3 + + # Ensemble learning parameters (not used) + ENSEMBLE_STRATEGY = 'soft_voting' + ENSEMBLE_MODELS = ["MobileNetV2"] + META_LEARNER_EPOCHS = 50 + + # Test-time augmentation parameters + TTA_AUGMENTATIONS = 5 + + # Advanced training parameters + USE_MIXUP = False + USE_CUTMIX = False + USE_LABEL_SMOOTHING = True # Usually beneficial + LABEL_SMOOTHING_FACTOR = 0.1 + + # Learning rate scheduling + USE_COSINE_ANNEALING = False + USE_WARM_RESTARTS = False + + # Regularization + USE_DROPOUT_SCHEDULING = False + USE_WEIGHT_DECAY = True # Standard regularization + WEIGHT_DECAY_FACTOR = 1e-4 + + # Paths + DATA_DIR = "data" + MODEL_SAVE_DIR = "saved_models" + RESULTS_DIR = "results" \ No newline at end of file diff --git a/src/ensemble_learner.py b/src/ensemble_learner.py new file mode 100644 index 0000000..2cfe51f --- /dev/null +++ b/src/ensemble_learner.py @@ -0,0 +1,207 @@ +import tensorflow as tf +from tensorflow.keras import layers, models +import numpy as np +from typing import List, Dict, Tuple + +class EnsembleLearner: + """ + Ensemble learning implementation for combining multiple models. + Supports various ensemble strategies including voting, averaging, and stacking. + """ + + def __init__(self, config): + self.config = config + self.models = [] + self.model_weights = [] + self.ensemble_strategy = getattr(config, 'ENSEMBLE_STRATEGY', 'soft_voting') + + def add_model(self, model, weight=1.0): + """Add a trained model to the ensemble.""" + self.models.append(model) + self.model_weights.append(weight) + + def create_stacked_ensemble(self, models_dict: Dict, train_data, val_data): + """ + Create a stacked ensemble using a meta-learner. + The meta-learner learns to combine predictions from base models. + """ + print("Creating stacked ensemble...") + + # Generate meta-features from base models + meta_train_features, meta_train_labels = self._generate_meta_features( + models_dict, train_data + ) + meta_val_features, meta_val_labels = self._generate_meta_features( + models_dict, val_data + ) + + # Create meta-learner + meta_learner = self._create_meta_learner(len(models_dict)) + + # Train meta-learner + meta_learner.compile( + optimizer='adam', + loss='categorical_crossentropy', + metrics=['accuracy'] + ) + + history = meta_learner.fit( + meta_train_features, meta_train_labels, + validation_data=(meta_val_features, meta_val_labels), + epochs=getattr(self.config, 'META_LEARNER_EPOCHS', 50), + batch_size=32, + verbose=1 + ) + + # Create final ensemble model + ensemble_model = self._create_ensemble_model(models_dict, meta_learner) + + return ensemble_model, history + + def _generate_meta_features(self, models_dict: Dict, dataset): + """Generate meta-features by collecting predictions from all base models.""" + all_predictions = [] + labels = [] + + # Collect predictions from each model + for model_name, model in models_dict.items(): + predictions = model.predict(dataset, verbose=0) + all_predictions.append(predictions) + + # Collect true labels (only once) + for _, batch_labels in dataset: + labels.extend(batch_labels.numpy()) + + # Stack predictions horizontally + meta_features = np.hstack(all_predictions) + meta_labels = np.array(labels[:len(meta_features)]) + + return meta_features, meta_labels + + def _create_meta_learner(self, num_base_models): + """Create the meta-learner neural network.""" + input_dim = num_base_models * self.config.NUM_CLASSES + + meta_learner = models.Sequential([ + layers.Dense(256, activation='relu', input_shape=(input_dim,)), + layers.Dropout(0.3), + layers.Dense(128, activation='relu'), + layers.Dropout(0.3), + layers.Dense(64, activation='relu'), + layers.Dropout(0.2), + layers.Dense(self.config.NUM_CLASSES, activation='softmax') + ]) + + return meta_learner + + def _create_ensemble_model(self, base_models: Dict, meta_learner): + """Create the final ensemble model that combines base models with meta-learner.""" + + # Create inputs for the ensemble + input_layer = layers.Input(shape=(*self.config.IMG_SIZE, 3)) + + # Get predictions from all base models + base_predictions = [] + for model_name, model in base_models.items(): + # Make base model non-trainable + model.trainable = False + pred = model(input_layer) + base_predictions.append(pred) + + # Concatenate all base predictions + combined_predictions = layers.Concatenate()(base_predictions) + + # Pass through meta-learner + final_prediction = meta_learner(combined_predictions) + + # Create final ensemble model + ensemble_model = models.Model(inputs=input_layer, outputs=final_prediction) + + return ensemble_model + + def predict_with_ensemble(self, models_dict: Dict, test_data, strategy='soft_voting'): + """ + Make predictions using ensemble strategy. + + Args: + models_dict: Dictionary of trained models + test_data: Test dataset + strategy: 'soft_voting', 'hard_voting', 'weighted_voting' + """ + print(f"Making ensemble predictions with strategy: {strategy}") + + # Collect predictions from all models + all_predictions = [] + for model_name, model in models_dict.items(): + predictions = model.predict(test_data, verbose=0) + all_predictions.append(predictions) + + # Apply ensemble strategy + if strategy == 'soft_voting': + # Average the probabilities + ensemble_pred = np.mean(all_predictions, axis=0) + elif strategy == 'hard_voting': + # Majority voting on predicted classes + hard_preds = [np.argmax(pred, axis=1) for pred in all_predictions] + ensemble_pred = np.array([ + np.bincount(votes).argmax() + for votes in zip(*hard_preds) + ]) + elif strategy == 'weighted_voting': + # Weighted average based on model performance + weights = np.array(self.model_weights) + weights = weights / weights.sum() # Normalize weights + ensemble_pred = np.average(all_predictions, axis=0, weights=weights) + else: + raise ValueError(f"Unknown ensemble strategy: {strategy}") + + return ensemble_pred + + def evaluate_ensemble_diversity(self, models_dict: Dict, test_data): + """ + Evaluate the diversity of models in the ensemble. + Higher diversity often leads to better ensemble performance. + """ + print("Evaluating ensemble diversity...") + + predictions = {} + for model_name, model in models_dict.items(): + pred = model.predict(test_data, verbose=0) + predictions[model_name] = np.argmax(pred, axis=1) + + # Calculate pairwise disagreement + model_names = list(predictions.keys()) + disagreements = {} + + for i, model1 in enumerate(model_names): + for j, model2 in enumerate(model_names[i+1:], i+1): + disagreement = np.mean(predictions[model1] != predictions[model2]) + disagreements[f"{model1}_vs_{model2}"] = disagreement + + # Calculate average disagreement + avg_disagreement = np.mean(list(disagreements.values())) + + print(f"Average pairwise disagreement: {avg_disagreement:.4f}") + print("Pairwise disagreements:") + for pair, disagreement in disagreements.items(): + print(f" {pair}: {disagreement:.4f}") + + return disagreements, avg_disagreement + + def create_model_ensemble_config(self, base_models: List[str]): + """ + Create configuration for training multiple models for ensemble. + """ + ensemble_configs = [] + + for i, model_name in enumerate(base_models): + config = { + 'model_name': model_name, + 'seed': 42 + i, # Different seeds for diversity + 'dropout_rate': self.config.DROPOUT_RATE + (i * 0.1), # Varying dropout + 'data_augmentation_strength': 1.0 + (i * 0.2), # Varying augmentation + 'initial_lr': self.config.INITIAL_LEARNING_RATE * (0.8 ** i), # Varying LR + } + ensemble_configs.append(config) + + return ensemble_configs \ No newline at end of file diff --git a/src/mobilenet_transfer_learning.py b/src/mobilenet_transfer_learning.py index c5947c8..aa7bea1 100644 --- a/src/mobilenet_transfer_learning.py +++ b/src/mobilenet_transfer_learning.py @@ -44,9 +44,7 @@ def create_base_model(self, model_name): def build_model(self, model_name): """ Build the complete model with the base MobileNet and additional layers. - Uses a sequential model which means the input is a single tensor. - Global average pooling is applied to reduce the spatial dimensions, - followed by a dropout layer and a dense output layer with softmax activation. + Uses a multi-branch architecture with attention mechanism for better feature extraction. """ base_model = self.create_base_model(model_name) @@ -58,15 +56,78 @@ def build_model(self, model_name): for layer in base_model.layers[num_frozen:]: layer.trainable = True - model = models.Sequential([ - base_model, - layers.GlobalAveragePooling2D(), - layers.Dropout(self.config.DROPOUT_RATE), - layers.Dense(self.config.NUM_CLASSES, activation='softmax') - ]) + # Check if enhanced architecture is enabled + if hasattr(self.config, 'USE_ENHANCED_ARCHITECTURE') and self.config.USE_ENHANCED_ARCHITECTURE: + return self._build_enhanced_model(base_model, model_name) + else: + # Original sequential model + model = models.Sequential([ + base_model, + layers.GlobalAveragePooling2D(), + layers.Dropout(self.config.DROPOUT_RATE), + layers.Dense(self.config.NUM_CLASSES, activation='softmax') + ]) + return model + def _build_enhanced_model(self, base_model, model_name): + """ + Build enhanced multi-branch architecture with attention mechanism. + """ + inputs = base_model.input + base_features = base_model(inputs) + + # Branch 1: Global Average Pooling (original approach) + branch1 = layers.GlobalAveragePooling2D(name='global_avg_pool')(base_features) + branch1 = layers.Dropout(self.config.DROPOUT_RATE)(branch1) + branch1 = layers.Dense(256, activation='relu', name='branch1_dense')(branch1) + + # Branch 2: Global Max Pooling for complementary features + branch2 = layers.GlobalMaxPooling2D(name='global_max_pool')(base_features) + branch2 = layers.Dropout(self.config.DROPOUT_RATE)(branch2) + branch2 = layers.Dense(256, activation='relu', name='branch2_dense')(branch2) + + # Branch 3: Attention mechanism + attention_features = self._create_attention_branch(base_features) + + # Combine branches + combined = layers.Concatenate(name='combine_branches')([branch1, branch2, attention_features]) + + # Additional processing layers + x = layers.Dense(512, activation='relu', name='combined_dense1')(combined) + x = layers.Dropout(self.config.DROPOUT_RATE)(x) + x = layers.Dense(256, activation='relu', name='combined_dense2')(x) + x = layers.Dropout(self.config.DROPOUT_RATE / 2)(x) + + # Output layer + outputs = layers.Dense(self.config.NUM_CLASSES, activation='softmax', name='predictions')(x) + + model = models.Model(inputs=inputs, outputs=outputs, name=f'{model_name}_enhanced') return model + def _create_attention_branch(self, base_features): + """ + Create attention mechanism branch for enhanced feature extraction. + """ + # Spatial attention + spatial_attention = layers.Conv2D(1, kernel_size=1, activation='sigmoid', name='spatial_attention')(base_features) + attended_features = layers.Multiply(name='apply_spatial_attention')([base_features, spatial_attention]) + + # Channel attention + gap = layers.GlobalAveragePooling2D(name='attention_gap')(attended_features) + channel_attention = layers.Dense(base_features.shape[-1] // 8, activation='relu', name='channel_attention_1')(gap) + channel_attention = layers.Dense(base_features.shape[-1], activation='sigmoid', name='channel_attention_2')(channel_attention) + channel_attention = layers.Reshape((1, 1, base_features.shape[-1]), name='reshape_channel_attention')(channel_attention) + + # Apply channel attention + final_attention = layers.Multiply(name='apply_channel_attention')([attended_features, channel_attention]) + + # Pool and process + attention_pooled = layers.GlobalAveragePooling2D(name='attention_pool')(final_attention) + attention_processed = layers.Dense(256, activation='relu', name='attention_dense')(attention_pooled) + attention_processed = layers.Dropout(self.config.DROPOUT_RATE)(attention_processed) + + return attention_processed + def compile_model(self, model, model_name, learning_rate=None): """ Compile the model to prepare it for training. @@ -142,8 +203,16 @@ def fine_tune_model(self, model, model_name, train_data, val_data): print(f"Fine-tuning {model_name}") print(f"{'='*50}") + # Check if progressive unfreezing is enabled + if hasattr(self.config, 'USE_PROGRESSIVE_UNFREEZING') and self.config.USE_PROGRESSIVE_UNFREEZING: + return self._progressive_fine_tune(model, model_name, train_data, val_data) + else: + return self._standard_fine_tune(model, model_name, train_data, val_data) + + def _standard_fine_tune(self, model, model_name, train_data, val_data): + """Standard fine-tuning approach (original method).""" # Unfreeze the base model and set the first half of its layers to be trainable - base_model = model.layers[0] + base_model = model.layers[0] if hasattr(model, 'layers') else model.get_layer(index=0) base_model.trainable = True fine_tune_at = len(base_model.layers) // 2 for layer in base_model.layers[:fine_tune_at]: @@ -179,3 +248,94 @@ def fine_tune_model(self, model, model_name, train_data, val_data): self.histories[f"{model_name}_fine_tuned"] = history return model, history, fine_tune_time + + def _progressive_fine_tune(self, model, model_name, train_data, val_data): + """Progressive unfreezing with multiple phases.""" + print("Using progressive unfreezing strategy...") + + # Find base model layer + base_model = None + for layer in model.layers: + if hasattr(layer, 'layers') and len(layer.layers) > 20: # Likely the base model + base_model = layer + break + + if base_model is None: + print("Warning: Could not find base model for progressive unfreezing. Using standard method.") + return self._standard_fine_tune(model, model_name, train_data, val_data) + + total_layers = len(base_model.layers) + phases = getattr(self.config, 'PROGRESSIVE_PHASES', 3) + epochs_per_phase = self.config.FINE_TUNE_EPOCHS // phases + + all_histories = [] + total_time = 0 + + for phase in range(phases): + print(f"\n--- Progressive Fine-tuning Phase {phase + 1}/{phases} ---") + + # Calculate which layers to unfreeze for this phase + unfreeze_from = total_layers - ((phase + 1) * total_layers // phases) + + # Set trainability + for i, layer in enumerate(base_model.layers): + layer.trainable = i >= unfreeze_from + + # Adjust learning rate for each phase + phase_lr = self.config.FINE_TUNE_LEARNING_RATE * (0.5 ** phase) + model = self.compile_model(model, model_name, learning_rate=phase_lr) + + print(f"Unfreezing layers from index {unfreeze_from} onwards ({total_layers - unfreeze_from} layers)") + print(f"Learning rate: {phase_lr:.6f}") + + callbacks = [ + EarlyStopping(patience=5, restore_best_weights=True), + ReduceLROnPlateau(factor=0.5, patience=3), + ModelCheckpoint( + f"{self.config.MODEL_SAVE_DIR}/{model_name}_progressive_phase_{phase+1}.h5", + save_best_only=True + ) + ] + + start_time = time.time() + history = model.fit( + train_data, + epochs=epochs_per_phase, + validation_data=val_data, + callbacks=callbacks, + verbose=1 + ) + + phase_time = time.time() - start_time + total_time += phase_time + all_histories.append(history) + print(f"Phase {phase + 1} time: {phase_time:.2f} seconds") + + # Combine all histories + combined_history = self._combine_histories(all_histories) + + # Save final model + model.save(f"{self.config.MODEL_SAVE_DIR}/{model_name}_fine_tuned.h5") + + print(f"Total fine-tuning time: {total_time:.2f} seconds") + self.models[f"{model_name}_fine_tuned"] = model + self.histories[f"{model_name}_fine_tuned"] = combined_history + + return model, combined_history, total_time + + def _combine_histories(self, histories): + """Combine multiple training histories into one.""" + if not histories: + return None + + combined = {} + for key in histories[0].history.keys(): + combined[key] = [] + for history in histories: + combined[key].extend(history.history[key]) + + class CombinedHistory: + def __init__(self, history_dict): + self.history = history_dict + + return CombinedHistory(combined) diff --git a/src/model_evaluator.py b/src/model_evaluator.py index 943e33c..8d2d69d 100644 --- a/src/model_evaluator.py +++ b/src/model_evaluator.py @@ -23,7 +23,12 @@ def evaluate_model(self, model, test_data, model_name): """ print(f"\nEvaluating {model_name}...") - predictions = model.predict(test_data) + + # Check if test-time augmentation is enabled + if hasattr(self.config, 'USE_TEST_TIME_AUGMENTATION') and self.config.USE_TEST_TIME_AUGMENTATION: + predictions = self._evaluate_with_tta(model, test_data, model_name) + else: + predictions = model.predict(test_data) import numpy as np predicted_classes = np.argmax(predictions, axis=1) @@ -47,6 +52,36 @@ def evaluate_model(self, model, test_data, model_name): return predicted_classes, true_classes + def _evaluate_with_tta(self, model, test_data, model_name): + """ + Evaluate model with test-time augmentation for improved accuracy. + """ + from .data_processor import DataProcessor + + print(f"Using test-time augmentation for {model_name}...") + + # Create data processor for TTA + data_processor = DataProcessor(self.config) + + # Create TTA dataset + tta_dataset, n_augmentations = data_processor.create_test_time_augmentation_dataset( + test_data, self.config.TTA_AUGMENTATIONS + ) + + # Get predictions on augmented data + tta_predictions = model.predict(tta_dataset, verbose=0) + + # Reshape predictions to group by original image + n_original_samples = len(tta_predictions) // n_augmentations + reshaped_predictions = tta_predictions.reshape(n_original_samples, n_augmentations, -1) + + # Average predictions across augmentations + final_predictions = np.mean(reshaped_predictions, axis=1) + + print(f"TTA completed with {n_augmentations} augmentations per image") + + return final_predictions + def get_model_size(self, model): """ Calculate the size of the model in MB.