diff --git a/results/config_snapshot_20250718_001955.py b/results/config_snapshot_20250718_001955.py
new file mode 100644
index 0000000..1ab2ee5
--- /dev/null
+++ b/results/config_snapshot_20250718_001955.py
@@ -0,0 +1,58 @@
+class Config:
+    # Models to train
+    MODELS_TO_TRAIN = ["MobileNetV2"]
+
+    # Data parameters
+    IMG_SIZE = (224, 224)
+    BATCH_SIZE = 32
+    TRAIN_SPLIT = 0.80
+    VALIDATION_SPLIT = 0.25
+
+    # Training parameters
+    INITIAL_EPOCHS = 20
+    FINE_TUNE_EPOCHS = 10
+    INITIAL_LEARNING_RATE = 0.0005
+    FINE_TUNE_LEARNING_RATE = 0.0001
+
+    # Model parameters
+    NUM_CLASSES = 23
+    DROPOUT_RATE = 0.5
+    NUM_FROZEN_LAYERS = 2  # Number of layers to freeze in feature extraction
+
+    # Enhanced Architecture Features (set to True to enable)
+    USE_ENHANCED_ARCHITECTURE = False  # Multi-branch architecture with attention
+    USE_PROGRESSIVE_UNFREEZING = False  # Progressive unfreezing strategy
+    USE_ADVANCED_AUGMENTATION = False  # Advanced data augmentation
+    USE_ENSEMBLE_LEARNING = False  # Ensemble learning
+    USE_TEST_TIME_AUGMENTATION = False  # Test-time augmentation
+
+    # Progressive unfreezing parameters
+    PROGRESSIVE_PHASES = 3  # Number of progressive unfreezing phases
+    
+    # Ensemble learning parameters
+    ENSEMBLE_STRATEGY = 'soft_voting'  # 'soft_voting', 'hard_voting', 'weighted_voting', 'stacking'
+    ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"]
+    META_LEARNER_EPOCHS = 50
+    
+    # Test-time augmentation parameters
+    TTA_AUGMENTATIONS = 5  # Number of augmentations per test image
+    
+    # Advanced training parameters
+    USE_MIXUP = False  # Mixup data augmentation
+    USE_CUTMIX = False  # CutMix data augmentation
+    USE_LABEL_SMOOTHING = False  # Label smoothing
+    LABEL_SMOOTHING_FACTOR = 0.1
+    
+    # Learning rate scheduling
+    USE_COSINE_ANNEALING = False  # Cosine annealing scheduler
+    USE_WARM_RESTARTS = False  # Warm restarts
+    
+    # Regularization
+    USE_DROPOUT_SCHEDULING = False  # Adaptive dropout scheduling
+    USE_WEIGHT_DECAY = False  # L2 weight decay
+    WEIGHT_DECAY_FACTOR = 1e-4
+
+    # Paths
+    DATA_DIR = "data"
+    MODEL_SAVE_DIR = "saved_models"
+    RESULTS_DIR = "results"
diff --git a/results/model_comparison_20250718_001955.csv b/results/model_comparison_20250718_001955.csv
new file mode 100644
index 0000000..840ad19
--- /dev/null
+++ b/results/model_comparison_20250718_001955.csv
@@ -0,0 +1,2 @@
+,Accuracy (%),Loss,Size (MB),Parameters
+MobileNetV2,49.59,1.6897,9.3222,2287447
diff --git a/results/training_comparison_20250718_001955.png b/results/training_comparison_20250718_001955.png
new file mode 100644
index 0000000..6636470
Binary files /dev/null and b/results/training_comparison_20250718_001955.png differ
diff --git a/results/training_history_20250718_001955.csv b/results/training_history_20250718_001955.csv
new file mode 100644
index 0000000..e126c47
--- /dev/null
+++ b/results/training_history_20250718_001955.csv
@@ -0,0 +1,31 @@
+model,epoch,train_accuracy,val_accuracy,train_loss,val_loss
+MobileNetV2_feature_extraction,1,0.10285714268684387,0.1804979294538498,3.2679028511047363,2.7532925605773926
+MobileNetV2_feature_extraction,2,0.22207792103290558,0.28215768933296204,2.6492979526519775,2.5107262134552
+MobileNetV2_feature_extraction,3,0.32181817293167114,0.3381742835044861,2.3156282901763916,2.323220729827881
+MobileNetV2_feature_extraction,4,0.37584415078163147,0.3838174343109131,2.1061758995056152,2.228482484817505
+MobileNetV2_feature_extraction,5,0.4171428680419922,0.3921161890029907,1.9495772123336792,2.1282544136047363
+MobileNetV2_feature_extraction,6,0.4527272582054138,0.4066390097141266,1.840901255607605,2.0412991046905518
+MobileNetV2_feature_extraction,7,0.47220778465270996,0.4273858964443207,1.7568695545196533,1.970870852470398
+MobileNetV2_feature_extraction,8,0.5070129632949829,0.4585062265396118,1.6532152891159058,1.9253556728363037
+MobileNetV2_feature_extraction,9,0.5210389494895935,0.46265560388565063,1.5946297645568848,1.8834987878799438
+MobileNetV2_feature_extraction,10,0.5425974130630493,0.47095435857772827,1.5383957624435425,1.8493201732635498
+MobileNetV2_feature_extraction,11,0.5488311648368835,0.46265560388565063,1.49072265625,1.7981990575790405
+MobileNetV2_feature_extraction,12,0.5703896284103394,0.4605809152126312,1.4195979833602905,1.7736471891403198
+MobileNetV2_feature_extraction,13,0.5807791948318481,0.4564315378665924,1.4124072790145874,1.7660542726516724
+MobileNetV2_feature_extraction,14,0.5774025917053223,0.46887966990470886,1.3677922487258911,1.7547590732574463
+MobileNetV2_feature_extraction,15,0.6015584468841553,0.46680498123168945,1.3222650289535522,1.734666347503662
+MobileNetV2_feature_extraction,16,0.6051948070526123,0.46473029255867004,1.3051408529281616,1.7301479578018188
+MobileNetV2_feature_extraction,17,0.6038960814476013,0.4730290472507477,1.2859197854995728,1.7186366319656372
+MobileNetV2_feature_extraction,18,0.6283116936683655,0.4792531132698059,1.2456966638565063,1.712315559387207
+MobileNetV2_feature_extraction,19,0.6168830990791321,0.4792531132698059,1.2730422019958496,1.7302953004837036
+MobileNetV2_feature_extraction,20,0.621558427810669,0.4792531132698059,1.2226208448410034,1.705318808555603
+MobileNetV2_fine_tuned,1,0.635064959526062,0.48547717928886414,1.1813465356826782,1.7019144296646118
+MobileNetV2_fine_tuned,2,0.6475324630737305,0.48755186796188354,1.172446846961975,1.6987230777740479
+MobileNetV2_fine_tuned,3,0.6496104001998901,0.4834024906158447,1.1667516231536865,1.7070080041885376
+MobileNetV2_fine_tuned,4,0.6407791972160339,0.4958506226539612,1.1737589836120605,1.6940451860427856
+MobileNetV2_fine_tuned,5,0.6376623511314392,0.48962655663490295,1.1516849994659424,1.7008932828903198
+MobileNetV2_fine_tuned,6,0.6342856884002686,0.4937759339809418,1.1716210842132568,1.6919862031936646
+MobileNetV2_fine_tuned,7,0.6249350905418396,0.4958506226539612,1.1754311323165894,1.6897497177124023
+MobileNetV2_fine_tuned,8,0.649350643157959,0.4958506226539612,1.1520062685012817,1.701900601387024
+MobileNetV2_fine_tuned,9,0.6594805121421814,0.49170124530792236,1.1290615797042847,1.6990609169006348
+MobileNetV2_fine_tuned,10,0.6503896117210388,0.4813278019428253,1.1408472061157227,1.6991528272628784
diff --git a/src/config.py b/src/config.py
index 4ee4302..1ab2ee5 100644
--- a/src/config.py
+++ b/src/config.py
@@ -19,6 +19,39 @@ class Config:
     DROPOUT_RATE = 0.5
     NUM_FROZEN_LAYERS = 2  # Number of layers to freeze in feature extraction
 
+    # Enhanced Architecture Features (set to True to enable)
+    USE_ENHANCED_ARCHITECTURE = False  # Multi-branch architecture with attention
+    USE_PROGRESSIVE_UNFREEZING = False  # Progressive unfreezing strategy
+    USE_ADVANCED_AUGMENTATION = False  # Advanced data augmentation
+    USE_ENSEMBLE_LEARNING = False  # Ensemble learning
+    USE_TEST_TIME_AUGMENTATION = False  # Test-time augmentation
+
+    # Progressive unfreezing parameters
+    PROGRESSIVE_PHASES = 3  # Number of progressive unfreezing phases
+    
+    # Ensemble learning parameters
+    ENSEMBLE_STRATEGY = 'soft_voting'  # 'soft_voting', 'hard_voting', 'weighted_voting', 'stacking'
+    ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"]
+    META_LEARNER_EPOCHS = 50
+    
+    # Test-time augmentation parameters
+    TTA_AUGMENTATIONS = 5  # Number of augmentations per test image
+    
+    # Advanced training parameters
+    USE_MIXUP = False  # Mixup data augmentation
+    USE_CUTMIX = False  # CutMix data augmentation
+    USE_LABEL_SMOOTHING = False  # Label smoothing
+    LABEL_SMOOTHING_FACTOR = 0.1
+    
+    # Learning rate scheduling
+    USE_COSINE_ANNEALING = False  # Cosine annealing scheduler
+    USE_WARM_RESTARTS = False  # Warm restarts
+    
+    # Regularization
+    USE_DROPOUT_SCHEDULING = False  # Adaptive dropout scheduling
+    USE_WEIGHT_DECAY = False  # L2 weight decay
+    WEIGHT_DECAY_FACTOR = 1e-4
+
     # Paths
     DATA_DIR = "data"
     MODEL_SAVE_DIR = "saved_models"
diff --git a/src/data_processor.py b/src/data_processor.py
index c068a9e..845e453 100644
--- a/src/data_processor.py
+++ b/src/data_processor.py
@@ -12,12 +12,11 @@ def create_data_generators(self, train_dir, validation_dir=None):
         If validation_dir is not provided, split train_dir using validation_split.
         """
 
-        # Data augmentation pipeline
-        data_augmentation = tf.keras.Sequential([
-            layers.RandomFlip("horizontal"),
-            layers.RandomRotation(0.1),
-            layers.RandomZoom(0.1),
-        ])
+        # Choose augmentation strategy based on config
+        if hasattr(self.config, 'USE_ADVANCED_AUGMENTATION') and self.config.USE_ADVANCED_AUGMENTATION:
+            data_augmentation = self._create_advanced_augmentation()
+        else:
+            data_augmentation = self._create_basic_augmentation()
 
         if validation_dir:
             train_ds = tf.keras.utils.image_dataset_from_directory(
@@ -75,6 +74,62 @@ def create_data_generators(self, train_dir, validation_dir=None):
 
         return train_ds, val_ds
 
+    def _create_basic_augmentation(self):
+        """Create basic data augmentation pipeline."""
+        return tf.keras.Sequential([
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.1),
+            layers.RandomZoom(0.1),
+        ])
+
+    def _create_advanced_augmentation(self):
+        """Create advanced data augmentation pipeline with more sophisticated techniques."""
+        return tf.keras.Sequential([
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.2),
+            layers.RandomZoom(0.15),
+            layers.RandomContrast(0.2),
+            layers.RandomBrightness(0.2),
+            layers.RandomTranslation(0.1, 0.1),
+            # Custom augmentation for better leaf/tree feature learning
+            layers.Lambda(lambda x: tf.image.random_hue(x, 0.1)),
+            layers.Lambda(lambda x: tf.image.random_saturation(x, 0.8, 1.2)),
+            layers.Lambda(lambda x: tf.image.random_jpeg_quality(x, 85, 100)),
+        ])
+
+    def create_test_time_augmentation_dataset(self, test_ds, n_augmentations=5):
+        """
+        Create test-time augmentation dataset for improved inference accuracy.
+        """
+        augmentation = tf.keras.Sequential([
+            layers.RandomFlip("horizontal"),
+            layers.RandomRotation(0.05),
+            layers.RandomZoom(0.05),
+            layers.RandomBrightness(0.1),
+        ])
+        
+        def augment_batch(x, y):
+            # Create multiple augmented versions of each image
+            augmented_images = []
+            for _ in range(n_augmentations):
+                aug_x = augmentation(x)
+                augmented_images.append(aug_x)
+            
+            # Stack all augmented versions
+            stacked = tf.stack(augmented_images, axis=1)  # Shape: (batch, n_aug, height, width, channels)
+            
+            # Reshape to treat each augmentation as a separate sample
+            batch_size = tf.shape(x)[0]
+            reshaped = tf.reshape(stacked, (-1, *x.shape[1:]))
+            
+            # Repeat labels for each augmentation
+            repeated_labels = tf.repeat(y, n_augmentations, axis=0)
+            
+            return reshaped, repeated_labels
+        
+        tta_ds = test_ds.map(augment_batch)
+        return tta_ds, n_augmentations
+
     def prepare_dataset_from_directory(self, data_dir):
         """
         Prepare dataset from a directory structure with subdirectories for each class.
diff --git a/src/enhanced_config.py b/src/enhanced_config.py
new file mode 100644
index 0000000..1f0d7cf
--- /dev/null
+++ b/src/enhanced_config.py
@@ -0,0 +1,128 @@
+class EnhancedConfig:
+    """
+    Enhanced configuration demonstrating all the new architectural improvements.
+    This configuration enables all advanced features for maximum performance.
+    """
+    
+    # Models to train
+    MODELS_TO_TRAIN = ["MobileNetV2"]
+
+    # Data parameters
+    IMG_SIZE = (224, 224)
+    BATCH_SIZE = 32
+    TRAIN_SPLIT = 0.80
+    VALIDATION_SPLIT = 0.25
+
+    # Training parameters
+    INITIAL_EPOCHS = 25
+    FINE_TUNE_EPOCHS = 15
+    INITIAL_LEARNING_RATE = 0.0005
+    FINE_TUNE_LEARNING_RATE = 0.0001
+
+    # Model parameters
+    NUM_CLASSES = 23
+    DROPOUT_RATE = 0.5
+    NUM_FROZEN_LAYERS = 2
+
+    # Enhanced Architecture Features - ALL ENABLED
+    USE_ENHANCED_ARCHITECTURE = True      # Multi-branch architecture with attention
+    USE_PROGRESSIVE_UNFREEZING = True     # Progressive unfreezing strategy
+    USE_ADVANCED_AUGMENTATION = True      # Advanced data augmentation
+    USE_ENSEMBLE_LEARNING = True          # Ensemble learning
+    USE_TEST_TIME_AUGMENTATION = True     # Test-time augmentation
+
+    # Progressive unfreezing parameters
+    PROGRESSIVE_PHASES = 3
+
+    # Ensemble learning parameters
+    ENSEMBLE_STRATEGY = 'stacking'  # Use stacking for best performance
+    ENSEMBLE_MODELS = ["MobileNetV2", "MobileNetV3Large", "MobileNetV3Small"]
+    META_LEARNER_EPOCHS = 50
+
+    # Test-time augmentation parameters
+    TTA_AUGMENTATIONS = 7  # More augmentations for better accuracy
+
+    # Advanced training parameters
+    USE_MIXUP = True
+    USE_CUTMIX = True
+    USE_LABEL_SMOOTHING = True
+    LABEL_SMOOTHING_FACTOR = 0.1
+
+    # Learning rate scheduling
+    USE_COSINE_ANNEALING = True
+    USE_WARM_RESTARTS = True
+
+    # Regularization
+    USE_DROPOUT_SCHEDULING = True
+    USE_WEIGHT_DECAY = True
+    WEIGHT_DECAY_FACTOR = 1e-4
+
+    # Paths
+    DATA_DIR = "data"
+    MODEL_SAVE_DIR = "saved_models"
+    RESULTS_DIR = "results"
+
+
+class ConservativeConfig:
+    """
+    Conservative configuration that enables only the most proven improvements.
+    Good for users who want better performance without experimental features.
+    """
+    
+    # Models to train
+    MODELS_TO_TRAIN = ["MobileNetV2"]
+
+    # Data parameters
+    IMG_SIZE = (224, 224)
+    BATCH_SIZE = 32
+    TRAIN_SPLIT = 0.80
+    VALIDATION_SPLIT = 0.25
+
+    # Training parameters
+    INITIAL_EPOCHS = 20
+    FINE_TUNE_EPOCHS = 10
+    INITIAL_LEARNING_RATE = 0.0005
+    FINE_TUNE_LEARNING_RATE = 0.0001
+
+    # Model parameters
+    NUM_CLASSES = 23
+    DROPOUT_RATE = 0.5
+    NUM_FROZEN_LAYERS = 2
+
+    # Enhanced Architecture Features - CONSERVATIVE SELECTION
+    USE_ENHANCED_ARCHITECTURE = False     # Keep original architecture
+    USE_PROGRESSIVE_UNFREEZING = True     # Proven to work well
+    USE_ADVANCED_AUGMENTATION = True      # Almost always beneficial
+    USE_ENSEMBLE_LEARNING = False         # Keep it simple
+    USE_TEST_TIME_AUGMENTATION = True     # Low risk, good reward
+
+    # Progressive unfreezing parameters
+    PROGRESSIVE_PHASES = 3
+
+    # Ensemble learning parameters (not used)
+    ENSEMBLE_STRATEGY = 'soft_voting'
+    ENSEMBLE_MODELS = ["MobileNetV2"]
+    META_LEARNER_EPOCHS = 50
+
+    # Test-time augmentation parameters
+    TTA_AUGMENTATIONS = 5
+
+    # Advanced training parameters
+    USE_MIXUP = False
+    USE_CUTMIX = False
+    USE_LABEL_SMOOTHING = True            # Usually beneficial
+    LABEL_SMOOTHING_FACTOR = 0.1
+
+    # Learning rate scheduling
+    USE_COSINE_ANNEALING = False
+    USE_WARM_RESTARTS = False
+
+    # Regularization
+    USE_DROPOUT_SCHEDULING = False
+    USE_WEIGHT_DECAY = True               # Standard regularization
+    WEIGHT_DECAY_FACTOR = 1e-4
+
+    # Paths
+    DATA_DIR = "data"
+    MODEL_SAVE_DIR = "saved_models"
+    RESULTS_DIR = "results"
\ No newline at end of file
diff --git a/src/ensemble_learner.py b/src/ensemble_learner.py
new file mode 100644
index 0000000..2cfe51f
--- /dev/null
+++ b/src/ensemble_learner.py
@@ -0,0 +1,207 @@
+import tensorflow as tf
+from tensorflow.keras import layers, models
+import numpy as np
+from typing import List, Dict, Tuple
+
+class EnsembleLearner:
+    """
+    Ensemble learning implementation for combining multiple models.
+    Supports various ensemble strategies including voting, averaging, and stacking.
+    """
+    
+    def __init__(self, config):
+        self.config = config
+        self.models = []
+        self.model_weights = []
+        self.ensemble_strategy = getattr(config, 'ENSEMBLE_STRATEGY', 'soft_voting')
+        
+    def add_model(self, model, weight=1.0):
+        """Add a trained model to the ensemble."""
+        self.models.append(model)
+        self.model_weights.append(weight)
+        
+    def create_stacked_ensemble(self, models_dict: Dict, train_data, val_data):
+        """
+        Create a stacked ensemble using a meta-learner.
+        The meta-learner learns to combine predictions from base models.
+        """
+        print("Creating stacked ensemble...")
+        
+        # Generate meta-features from base models
+        meta_train_features, meta_train_labels = self._generate_meta_features(
+            models_dict, train_data
+        )
+        meta_val_features, meta_val_labels = self._generate_meta_features(
+            models_dict, val_data
+        )
+        
+        # Create meta-learner
+        meta_learner = self._create_meta_learner(len(models_dict))
+        
+        # Train meta-learner
+        meta_learner.compile(
+            optimizer='adam',
+            loss='categorical_crossentropy',
+            metrics=['accuracy']
+        )
+        
+        history = meta_learner.fit(
+            meta_train_features, meta_train_labels,
+            validation_data=(meta_val_features, meta_val_labels),
+            epochs=getattr(self.config, 'META_LEARNER_EPOCHS', 50),
+            batch_size=32,
+            verbose=1
+        )
+        
+        # Create final ensemble model
+        ensemble_model = self._create_ensemble_model(models_dict, meta_learner)
+        
+        return ensemble_model, history
+    
+    def _generate_meta_features(self, models_dict: Dict, dataset):
+        """Generate meta-features by collecting predictions from all base models."""
+        all_predictions = []
+        labels = []
+        
+        # Collect predictions from each model
+        for model_name, model in models_dict.items():
+            predictions = model.predict(dataset, verbose=0)
+            all_predictions.append(predictions)
+            
+        # Collect true labels (only once)
+        for _, batch_labels in dataset:
+            labels.extend(batch_labels.numpy())
+            
+        # Stack predictions horizontally
+        meta_features = np.hstack(all_predictions)
+        meta_labels = np.array(labels[:len(meta_features)])
+        
+        return meta_features, meta_labels
+    
+    def _create_meta_learner(self, num_base_models):
+        """Create the meta-learner neural network."""
+        input_dim = num_base_models * self.config.NUM_CLASSES
+        
+        meta_learner = models.Sequential([
+            layers.Dense(256, activation='relu', input_shape=(input_dim,)),
+            layers.Dropout(0.3),
+            layers.Dense(128, activation='relu'),
+            layers.Dropout(0.3),
+            layers.Dense(64, activation='relu'),
+            layers.Dropout(0.2),
+            layers.Dense(self.config.NUM_CLASSES, activation='softmax')
+        ])
+        
+        return meta_learner
+    
+    def _create_ensemble_model(self, base_models: Dict, meta_learner):
+        """Create the final ensemble model that combines base models with meta-learner."""
+        
+        # Create inputs for the ensemble
+        input_layer = layers.Input(shape=(*self.config.IMG_SIZE, 3))
+        
+        # Get predictions from all base models
+        base_predictions = []
+        for model_name, model in base_models.items():
+            # Make base model non-trainable
+            model.trainable = False
+            pred = model(input_layer)
+            base_predictions.append(pred)
+        
+        # Concatenate all base predictions
+        combined_predictions = layers.Concatenate()(base_predictions)
+        
+        # Pass through meta-learner
+        final_prediction = meta_learner(combined_predictions)
+        
+        # Create final ensemble model
+        ensemble_model = models.Model(inputs=input_layer, outputs=final_prediction)
+        
+        return ensemble_model
+    
+    def predict_with_ensemble(self, models_dict: Dict, test_data, strategy='soft_voting'):
+        """
+        Make predictions using ensemble strategy.
+        
+        Args:
+            models_dict: Dictionary of trained models
+            test_data: Test dataset
+            strategy: 'soft_voting', 'hard_voting', 'weighted_voting'
+        """
+        print(f"Making ensemble predictions with strategy: {strategy}")
+        
+        # Collect predictions from all models
+        all_predictions = []
+        for model_name, model in models_dict.items():
+            predictions = model.predict(test_data, verbose=0)
+            all_predictions.append(predictions)
+        
+        # Apply ensemble strategy
+        if strategy == 'soft_voting':
+            # Average the probabilities
+            ensemble_pred = np.mean(all_predictions, axis=0)
+        elif strategy == 'hard_voting':
+            # Majority voting on predicted classes
+            hard_preds = [np.argmax(pred, axis=1) for pred in all_predictions]
+            ensemble_pred = np.array([
+                np.bincount(votes).argmax() 
+                for votes in zip(*hard_preds)
+            ])
+        elif strategy == 'weighted_voting':
+            # Weighted average based on model performance
+            weights = np.array(self.model_weights)
+            weights = weights / weights.sum()  # Normalize weights
+            ensemble_pred = np.average(all_predictions, axis=0, weights=weights)
+        else:
+            raise ValueError(f"Unknown ensemble strategy: {strategy}")
+        
+        return ensemble_pred
+    
+    def evaluate_ensemble_diversity(self, models_dict: Dict, test_data):
+        """
+        Evaluate the diversity of models in the ensemble.
+        Higher diversity often leads to better ensemble performance.
+        """
+        print("Evaluating ensemble diversity...")
+        
+        predictions = {}
+        for model_name, model in models_dict.items():
+            pred = model.predict(test_data, verbose=0)
+            predictions[model_name] = np.argmax(pred, axis=1)
+        
+        # Calculate pairwise disagreement
+        model_names = list(predictions.keys())
+        disagreements = {}
+        
+        for i, model1 in enumerate(model_names):
+            for j, model2 in enumerate(model_names[i+1:], i+1):
+                disagreement = np.mean(predictions[model1] != predictions[model2])
+                disagreements[f"{model1}_vs_{model2}"] = disagreement
+        
+        # Calculate average disagreement
+        avg_disagreement = np.mean(list(disagreements.values()))
+        
+        print(f"Average pairwise disagreement: {avg_disagreement:.4f}")
+        print("Pairwise disagreements:")
+        for pair, disagreement in disagreements.items():
+            print(f"  {pair}: {disagreement:.4f}")
+        
+        return disagreements, avg_disagreement
+    
+    def create_model_ensemble_config(self, base_models: List[str]):
+        """
+        Create configuration for training multiple models for ensemble.
+        """
+        ensemble_configs = []
+        
+        for i, model_name in enumerate(base_models):
+            config = {
+                'model_name': model_name,
+                'seed': 42 + i,  # Different seeds for diversity
+                'dropout_rate': self.config.DROPOUT_RATE + (i * 0.1),  # Varying dropout
+                'data_augmentation_strength': 1.0 + (i * 0.2),  # Varying augmentation
+                'initial_lr': self.config.INITIAL_LEARNING_RATE * (0.8 ** i),  # Varying LR
+            }
+            ensemble_configs.append(config)
+        
+        return ensemble_configs
\ No newline at end of file
diff --git a/src/mobilenet_transfer_learning.py b/src/mobilenet_transfer_learning.py
index c5947c8..aa7bea1 100644
--- a/src/mobilenet_transfer_learning.py
+++ b/src/mobilenet_transfer_learning.py
@@ -44,9 +44,7 @@ def create_base_model(self, model_name):
     def build_model(self, model_name):
         """
         Build the complete model with the base MobileNet and additional layers.
-        Uses a sequential model which means the input is a single tensor.
-        Global average pooling is applied to reduce the spatial dimensions,
-        followed by a dropout layer and a dense output layer with softmax activation.
+        Uses a multi-branch architecture with attention mechanism for better feature extraction.
         """
 
         base_model = self.create_base_model(model_name)
@@ -58,15 +56,78 @@ def build_model(self, model_name):
         for layer in base_model.layers[num_frozen:]:
             layer.trainable = True
 
-        model = models.Sequential([
-            base_model,
-            layers.GlobalAveragePooling2D(),
-            layers.Dropout(self.config.DROPOUT_RATE),
-            layers.Dense(self.config.NUM_CLASSES, activation='softmax')
-        ])
+        # Check if enhanced architecture is enabled
+        if hasattr(self.config, 'USE_ENHANCED_ARCHITECTURE') and self.config.USE_ENHANCED_ARCHITECTURE:
+            return self._build_enhanced_model(base_model, model_name)
+        else:
+            # Original sequential model
+            model = models.Sequential([
+                base_model,
+                layers.GlobalAveragePooling2D(),
+                layers.Dropout(self.config.DROPOUT_RATE),
+                layers.Dense(self.config.NUM_CLASSES, activation='softmax')
+            ])
+            return model
 
+    def _build_enhanced_model(self, base_model, model_name):
+        """
+        Build enhanced multi-branch architecture with attention mechanism.
+        """
+        inputs = base_model.input
+        base_features = base_model(inputs)
+        
+        # Branch 1: Global Average Pooling (original approach)
+        branch1 = layers.GlobalAveragePooling2D(name='global_avg_pool')(base_features)
+        branch1 = layers.Dropout(self.config.DROPOUT_RATE)(branch1)
+        branch1 = layers.Dense(256, activation='relu', name='branch1_dense')(branch1)
+        
+        # Branch 2: Global Max Pooling for complementary features
+        branch2 = layers.GlobalMaxPooling2D(name='global_max_pool')(base_features)
+        branch2 = layers.Dropout(self.config.DROPOUT_RATE)(branch2)
+        branch2 = layers.Dense(256, activation='relu', name='branch2_dense')(branch2)
+        
+        # Branch 3: Attention mechanism
+        attention_features = self._create_attention_branch(base_features)
+        
+        # Combine branches
+        combined = layers.Concatenate(name='combine_branches')([branch1, branch2, attention_features])
+        
+        # Additional processing layers
+        x = layers.Dense(512, activation='relu', name='combined_dense1')(combined)
+        x = layers.Dropout(self.config.DROPOUT_RATE)(x)
+        x = layers.Dense(256, activation='relu', name='combined_dense2')(x)
+        x = layers.Dropout(self.config.DROPOUT_RATE / 2)(x)
+        
+        # Output layer
+        outputs = layers.Dense(self.config.NUM_CLASSES, activation='softmax', name='predictions')(x)
+        
+        model = models.Model(inputs=inputs, outputs=outputs, name=f'{model_name}_enhanced')
         return model
 
+    def _create_attention_branch(self, base_features):
+        """
+        Create attention mechanism branch for enhanced feature extraction.
+        """
+        # Spatial attention
+        spatial_attention = layers.Conv2D(1, kernel_size=1, activation='sigmoid', name='spatial_attention')(base_features)
+        attended_features = layers.Multiply(name='apply_spatial_attention')([base_features, spatial_attention])
+        
+        # Channel attention
+        gap = layers.GlobalAveragePooling2D(name='attention_gap')(attended_features)
+        channel_attention = layers.Dense(base_features.shape[-1] // 8, activation='relu', name='channel_attention_1')(gap)
+        channel_attention = layers.Dense(base_features.shape[-1], activation='sigmoid', name='channel_attention_2')(channel_attention)
+        channel_attention = layers.Reshape((1, 1, base_features.shape[-1]), name='reshape_channel_attention')(channel_attention)
+        
+        # Apply channel attention
+        final_attention = layers.Multiply(name='apply_channel_attention')([attended_features, channel_attention])
+        
+        # Pool and process
+        attention_pooled = layers.GlobalAveragePooling2D(name='attention_pool')(final_attention)
+        attention_processed = layers.Dense(256, activation='relu', name='attention_dense')(attention_pooled)
+        attention_processed = layers.Dropout(self.config.DROPOUT_RATE)(attention_processed)
+        
+        return attention_processed
+
     def compile_model(self, model, model_name, learning_rate=None):
         """
         Compile the model to prepare it for training.
@@ -142,8 +203,16 @@ def fine_tune_model(self, model, model_name, train_data, val_data):
         print(f"Fine-tuning {model_name}")
         print(f"{'='*50}")
 
+        # Check if progressive unfreezing is enabled
+        if hasattr(self.config, 'USE_PROGRESSIVE_UNFREEZING') and self.config.USE_PROGRESSIVE_UNFREEZING:
+            return self._progressive_fine_tune(model, model_name, train_data, val_data)
+        else:
+            return self._standard_fine_tune(model, model_name, train_data, val_data)
+
+    def _standard_fine_tune(self, model, model_name, train_data, val_data):
+        """Standard fine-tuning approach (original method)."""
         # Unfreeze the base model and set the first half of its layers to be trainable
-        base_model = model.layers[0]
+        base_model = model.layers[0] if hasattr(model, 'layers') else model.get_layer(index=0)
         base_model.trainable = True
         fine_tune_at = len(base_model.layers) // 2
         for layer in base_model.layers[:fine_tune_at]:
@@ -179,3 +248,94 @@ def fine_tune_model(self, model, model_name, train_data, val_data):
         self.histories[f"{model_name}_fine_tuned"] = history
 
         return model, history, fine_tune_time
+
+    def _progressive_fine_tune(self, model, model_name, train_data, val_data):
+        """Progressive unfreezing with multiple phases."""
+        print("Using progressive unfreezing strategy...")
+        
+        # Find base model layer
+        base_model = None
+        for layer in model.layers:
+            if hasattr(layer, 'layers') and len(layer.layers) > 20:  # Likely the base model
+                base_model = layer
+                break
+        
+        if base_model is None:
+            print("Warning: Could not find base model for progressive unfreezing. Using standard method.")
+            return self._standard_fine_tune(model, model_name, train_data, val_data)
+
+        total_layers = len(base_model.layers)
+        phases = getattr(self.config, 'PROGRESSIVE_PHASES', 3)
+        epochs_per_phase = self.config.FINE_TUNE_EPOCHS // phases
+        
+        all_histories = []
+        total_time = 0
+        
+        for phase in range(phases):
+            print(f"\n--- Progressive Fine-tuning Phase {phase + 1}/{phases} ---")
+            
+            # Calculate which layers to unfreeze for this phase
+            unfreeze_from = total_layers - ((phase + 1) * total_layers // phases)
+            
+            # Set trainability
+            for i, layer in enumerate(base_model.layers):
+                layer.trainable = i >= unfreeze_from
+            
+            # Adjust learning rate for each phase
+            phase_lr = self.config.FINE_TUNE_LEARNING_RATE * (0.5 ** phase)
+            model = self.compile_model(model, model_name, learning_rate=phase_lr)
+            
+            print(f"Unfreezing layers from index {unfreeze_from} onwards ({total_layers - unfreeze_from} layers)")
+            print(f"Learning rate: {phase_lr:.6f}")
+            
+            callbacks = [
+                EarlyStopping(patience=5, restore_best_weights=True),
+                ReduceLROnPlateau(factor=0.5, patience=3),
+                ModelCheckpoint(
+                    f"{self.config.MODEL_SAVE_DIR}/{model_name}_progressive_phase_{phase+1}.h5",
+                    save_best_only=True
+                )
+            ]
+            
+            start_time = time.time()
+            history = model.fit(
+                train_data,
+                epochs=epochs_per_phase,
+                validation_data=val_data,
+                callbacks=callbacks,
+                verbose=1
+            )
+            
+            phase_time = time.time() - start_time
+            total_time += phase_time
+            all_histories.append(history)
+            print(f"Phase {phase + 1} time: {phase_time:.2f} seconds")
+        
+        # Combine all histories
+        combined_history = self._combine_histories(all_histories)
+        
+        # Save final model
+        model.save(f"{self.config.MODEL_SAVE_DIR}/{model_name}_fine_tuned.h5")
+        
+        print(f"Total fine-tuning time: {total_time:.2f} seconds")
+        self.models[f"{model_name}_fine_tuned"] = model
+        self.histories[f"{model_name}_fine_tuned"] = combined_history
+        
+        return model, combined_history, total_time
+
+    def _combine_histories(self, histories):
+        """Combine multiple training histories into one."""
+        if not histories:
+            return None
+        
+        combined = {}
+        for key in histories[0].history.keys():
+            combined[key] = []
+            for history in histories:
+                combined[key].extend(history.history[key])
+        
+        class CombinedHistory:
+            def __init__(self, history_dict):
+                self.history = history_dict
+        
+        return CombinedHistory(combined)
diff --git a/src/model_evaluator.py b/src/model_evaluator.py
index 943e33c..8d2d69d 100644
--- a/src/model_evaluator.py
+++ b/src/model_evaluator.py
@@ -23,7 +23,12 @@ def evaluate_model(self, model, test_data, model_name):
         """
 
         print(f"\nEvaluating {model_name}...")
-        predictions = model.predict(test_data)
+        
+        # Check if test-time augmentation is enabled
+        if hasattr(self.config, 'USE_TEST_TIME_AUGMENTATION') and self.config.USE_TEST_TIME_AUGMENTATION:
+            predictions = self._evaluate_with_tta(model, test_data, model_name)
+        else:
+            predictions = model.predict(test_data)
 
         import numpy as np
         predicted_classes = np.argmax(predictions, axis=1)
@@ -47,6 +52,36 @@ def evaluate_model(self, model, test_data, model_name):
 
         return predicted_classes, true_classes
 
+    def _evaluate_with_tta(self, model, test_data, model_name):
+        """
+        Evaluate model with test-time augmentation for improved accuracy.
+        """
+        from .data_processor import DataProcessor
+        
+        print(f"Using test-time augmentation for {model_name}...")
+        
+        # Create data processor for TTA
+        data_processor = DataProcessor(self.config)
+        
+        # Create TTA dataset
+        tta_dataset, n_augmentations = data_processor.create_test_time_augmentation_dataset(
+            test_data, self.config.TTA_AUGMENTATIONS
+        )
+        
+        # Get predictions on augmented data
+        tta_predictions = model.predict(tta_dataset, verbose=0)
+        
+        # Reshape predictions to group by original image
+        n_original_samples = len(tta_predictions) // n_augmentations
+        reshaped_predictions = tta_predictions.reshape(n_original_samples, n_augmentations, -1)
+        
+        # Average predictions across augmentations
+        final_predictions = np.mean(reshaped_predictions, axis=1)
+        
+        print(f"TTA completed with {n_augmentations} augmentations per image")
+        
+        return final_predictions
+
     def get_model_size(self, model):
         """
         Calculate the size of the model in MB.