rsameni · ramon349 · Oct 30, 2020 · Oct 30, 2020 · Nov 5, 2020 · Nov 5, 2020
diff --git a/Ex01_testPCA.m b/Ex01_testPCA.m
@@ -15,8 +15,7 @@
 clc
 clear
 close all
-
-example = 1;
+example = 2; %changing dataset considered 
 switch example
     case 1 % Load a sample EEG signal
         load EEGdata textdata data % A sample EEG from the OSET package
@@ -32,7 +31,69 @@
     otherwise
         error('unknown example');
 end
-
 N = size(x, 1); % The number of channels
 T = size(x, 2); % The number of samples per channel
+% Plot the channels
+PlotECG(x, 4, 'b', fs, 'Raw data channels');
+% Remove the channel means
+x_demeaned = x - mean(x, 2) * ones(1, size(x, 2));
+% Plot the zero-mean channels
+% PlotECG(x_demeaned, 4, 'r', fs, 'Zero-mean data channels');
+% Covariance matrix of the input
+Cx = cov(x_demeaned')
+% Eigenvalue decomposition
+[V, D] = eig(Cx, 'vector');
+figure
+subplot(121)
+plot(D(end:-1:1));
+grid
+xlabel('Index');
+ylabel('Eigenvalue');
+title('Eigenvalues in linear scale');
+subplot(122)
+plot(10*log10(D(end:-1:1)/D(end)));
+grid
+xlabel('Index');
+ylabel('Eigenvalue ratios in dB');
+title('Normalized eigenvalues in log scale');
+% Check signal evergy
+x_var = var(x_demeaned, [], 2) % Formula 1
+x_var2 = diag(Cx) % formula 2
+% Decorrelate the channels
+y = V' * x_demeaned;
+Cy = cov(y')
+y_var = diag(Cy)
+% PlotECG(y, 4, 'r', fs, 'Decorrelated data channels');
+% Check total energy match
+x_total_energy = sum(x_var)
+Cx_trace = trace(Cx)
+eigenvale_sum = sum(D)
+Cy_trace = trace(Cy)
+% partial energy in eigenvalues
+x_partial_energy = 100.0 * cumsum(D(end : -1 : 1))./x_total_energy
+% set a cut off threshold for the eigenvalues
+th = 90; % change threshold from 99 to 90 
+N_eigs_to_keep = find(x_partial_energy <= th, 1, 'last')
+
+% find a compressed version of x
+x_compressed = V(:, N - N_eigs_to_keep + 1 : N) * y(N - N_eigs_to_keep + 1 : N, :);
+% eig(cov(x_compressed'))
+
+t = (0 : T - 1)/fs;
+for ch = 1 : N
+    figure
+    hold on
+    plot(t, x(ch, :));
+    plot(t, x_compressed(ch, :));
+    legend(['channel ' num2str(ch)], 'compressed');
+    grid
+end
+
+%here we change the dataset to use the sample dataset two. Our primary
+%change was to look into the 90th percentile in signal instead of the 99th
+%percentile. It is clearly evident that in some channels a lot of the
+%variability is no longer captured. Yet most of the trends are kept as one
+%would expect from PCA. 
+
+
 
diff --git a/Ex02_testEigenAnalysisPowerMethod.m b/Ex02_testEigenAnalysisPowerMethod.m
@@ -24,3 +24,29 @@
 % Cx = x * x';
 Cx = cov(x');
 
+% Apply eigenvalue decomposition
+% Read 'eig' help and compare with 'eigs'
+[V,D] = eig(Cx)
+
+Itr = 20; % The number of power method iterations
+
+v0 = rand(N, 1);
+v1 = EigenAnalysisPowerMethod(Cx, v0, Itr);
+scale1 = (Cx*v1)./v1;
+lambda1 = mean(scale1)
+
+C = Cx - lambda1 * (v1 * v1');
+v2 = EigenAnalysisPowerMethod(C, v0, Itr);
+scale2 = (Cx*v2)./v2;
+lambda2 = mean(scale2)
+
+C = C - mean(lambda2) * (v2 * v2');
+v3 = EigenAnalysisPowerMethod(C, v0, Itr);
+scale3 = (Cx*v3)./v3;
+lambda3 = mean(scale3)
+
+%For thsi section we  modified the iteration accunt to see how small the
+%iteration count could get to reliably estimate the eigne value.  Around
+%the 20 iteration count we see stable levels of performance. This is a
+%simpler signal and could perhaps not generalzie to larger matrices. 
+
diff --git a/Ex03_testICAmethods.m b/Ex03_testICAmethods.m
@@ -16,6 +16,7 @@
 clear
 close all
 
+rng(123) %added rng number
 example = 3;
 switch example
     case 1 % A sample EEG from the OSET package
@@ -46,3 +47,42 @@
 N = size(x, 1); % The number of channels
 T = size(x, 2); % The number of samples per channel
 
+% Plot the channels
+PlotECG(x, 4, 'b', fs, 'Raw data channels');
+
+% Run fastica
+approach = 'symm'; % 'symm' or 'defl'
+g = 'skew'; % 'pow3', 'tanh', 'gauss', 'skew'
+lastEigfastica = N; % PCA stage
+numOfIC = N; % ICA stage
+interactivePCA = 'off';
+[s_fastica, A_fatsica, W_fatsica] = fastica (x, 'approach', approach, 'g', g, 'lastEig', lastEigfastica, 'numOfIC', numOfIC, 'interactivePCA', interactivePCA, 'verbose', 'off', 'displayMode', 'off');
+
+% Check the covariance matrix
+Cs = cov(s_fastica');
+
+% Run JADE
+lastEigJADE = N; % PCA stage
+W_JADE = jadeR(x, lastEigJADE);
+s_jade = W_JADE * x;
+
+% Run SOBI
+lastEigSOBI = N; % PCA stage
+num_cov_matrices = 100;
+[W_SOBI, s_sobi] = sobi(x, lastEigSOBI, num_cov_matrices);
+
+% Plot the sources
+PlotECG(s_fastica, 4, 'r', fs, 'Sources extracted by fatsica');
+
+% Plot the sources
+PlotECG(s_jade, 4, 'k', fs, 'Sources extracted by JADE');
+
+% Plot the sources
+PlotECG(s_sobi, 4, 'm', fs, 'Sources extracted by SOBI');
+
+%for this section we compared the changes  caused by different
+%appriximation approaches. We observed tanh,pow3, and skew to view a small
+%subset of variability. We notice that  tanh and po3 have the greatest
+%agreeement. Meanwhile skew produces  slightly different results 
+
+
diff --git a/Ex04_testEOGArtifactRemoval.m b/Ex04_testEOGArtifactRemoval.m
@@ -32,3 +32,84 @@
 T = size(x, 2); % The number of samples per channel
 t = (0 : T - 1)/fs;
 
+% Plot the channels
+%PlotECG(x, 4, 'b', fs, 'Raw data channels');
+
+% Run JADE
+lastEigJADE = N; % PCA stage
+W_JADE = jadeR(x, lastEigJADE);
+s_jade = W_JADE * x;
+A_jade = pinv(W_JADE); % The mixing matrix
+
+% Plot the sources
+% PlotECG(s_jade, 4, 'k', fs, 'Sources extracted by JADE');
+
+% Channel denoising by JADE
+eog_channel = [2 3]; % check from the plots to visually detect the EOG
+s_jade_denoised = s_jade;
+s_jade_denoised(eog_channel, :) = 0;
+x_denoised_jade = A_jade * s_jade_denoised;
+
+% Channel denoising by NSCA
+eog_channel = 24;
+eog_ref = x(eog_channel, :);
+energy_envelope_len = round(0.25*fs);
+eog_envelope = sqrt(filtfilt(ones(1, energy_envelope_len), energy_envelope_len, eog_ref.^2));
+med = median(eog_envelope);
+mx = max(eog_envelope);
+eog_detection_threshold = 0.95 * med + 0.05 * mx;
+%eog_detection_threshold = 0.90 * med + 0.1 * mx;
+
+J = eog_envelope >= eog_detection_threshold;
+I = 1 : T;
+
+[s_nsca, W_nsca, A_nsca] = NSCA(x,J, I);
+% Channel denoising by JADE
+eog_channel = [1 2]; % check from the plots to visually detect the EOG
+s_nsca_denoised = s_nsca;
+s_nsca_denoised(eog_channel, :) = 0;
+x_denoised_nsca = A_nsca * s_nsca_denoised;
+% PlotECG(s_nsca, 4, 'r', fs, 'Sources extracted by NSCA');
+
+%figure
+%hold on
+%plot(t, eog_ref);
+%plot(t, eog_envelope);
+%plot(t, eog_detection_threshold(ones(1, T)));
+%grid
+%legend('EOG reference channel', 'EOG envelope');
+
+% Plot the denoised signals
+for ch = 1 : 3
+    figure
+    hold on
+    plot(t, x(ch, :));
+    plot(t, x_denoised_jade(ch, :));
+    plot(t, x_denoised_nsca(ch, :));
+    legend(['channel ' num2str(ch)], 'denoised by JADE', 'denoised by NSCA');
+    disp('Signal change')
+    disp(std(x(ch,:)))
+    disp( std(x_denoised_jade(ch, :)));
+    disp(std( x_denoised_nsca(ch, :)));
+    grid
+end
+
+
+% Run the following script from the OSET package for a more advanced method
+%testEOGRemoval
+
+
+%noise removal utilizing NCSA seems to be more aggresive than using jade 
+%this is reflected in the summary statistics where the standard devieaiton
+%of the signal is smallest with nsca. At a high level observation the
+%signals appear to be rather similar. On closer inspection we can see
+%differences in denoising ocmparing signals from channel 1 and channel 3 
+%in the region with a rapid change seen in the 31 second mark we see that
+%jade trails behind the NSCA method. Suggesting that jade is more sensitive
+%to rapid changes in signals. Meanwhile for regions with a consistent
+%signal( i.e flat)  both methods are in agreement 
+
+
+
+
+
diff --git a/Ex05_testFetalECGExtraction.m b/Ex05_testFetalECGExtraction.m
@@ -13,9 +13,18 @@
 %
 
 % Uncomment the following lines to run advanced demos
-testPCAICAPiCAfECGExtraction % Call this demo from the OSET package
-% testECGICAPCAPiCAPlot1
+%testPCAICAPiCAfECGExtraction % Call this demo from the OSET package
+ %testECGICAPCAPiCAPlot1
+
 % testECGICAPCAPiCAPlot1
-% testAveragefECGbyDeflationAndKF
+ testAveragefECGbyDeflationAndKF
 % testICAPiCAAfterMECGCancellation % (using the deflation algorithm + Kalman filter)
-% testPCAICAPiCAfECGDenoising % (denoising using BSS and semi-BSS methods)
+% testPCAICAPiCAfECGDenoising % (denoising using BSS and semi-BSS methods)
+
+
+% this script appears to be using several model estimation techniques to
+% fit a polynomial curve onto the the ECG signal.  It allows for an
+% interective interpretatatio nof the data which i find quite useful.
+% Interestingly enough we can get an ok fit with just 2 parameters to be
+% fit producing an error of 1.82% 
+% meanwhile for the 
diff --git a/README.md b/README.md
@@ -1,2 +1,74 @@
-# BSSPackage
-Simplified illustration of blind-source separation algorithms
+# question 1 
+A)
+
+![alt text](./hand_work_eigen.png)
+
+
+B) 
+
+We obtain similar eigenvalues to the one obtained  by matlab eig. For the eigenvector we had some initial diferences when it comes to the final values 
+We were abled to obtain similar direction vectors but their individiual values are different. We ere able to rectify this by normalizing the vectors to unit norm. The script hw.m also includes the code for deriving the eigen vectors by "hand" using symbolic math solvers.
+
+
+
+# question 2 
+
+![alt text](./eig_val_time.png )
+
+![alt text](./eig_ve_component.png)
+
+
+
+# question 3: execrises 
+
+##  Ex 1 
+
+
+here we change the dataset to use the sample dataset two. Our primary
+change was to look into the 90th percentile in signal instead of the 99th
+percentile. It is clearly evident that in some channels a lot of the
+variability is no longer captured. Yet most of the trends are kept as one
+would expect from PCA. 
+
+## Ex 2 
+
+For thsi section we  modified the iteration accunt to see how small the
+iteration count could get to reliably estimate the eigne value.  Around
+the 20 iteration count we see stable levels of performance. This is a
+simpler signal and could perhaps not generalzie to larger matrices. 
+
+## Ex 3 
+for this section we compared the changes  caused by different
+appriximation approaches. We observed tanh,pow3, and skew to view a small
+subset of variability. We notice that  tanh and po3 have the greatest
+agreeement. Meanwhile skew produces  slightly different results. Bellow  we have  the skew and tanh example outputs of fastica. 
+
+![alt text](./fastica_skew_out.png) 
+![alt text](./fastica_tanh_out.png)
+
+
+## Ex 4 
+noise removal utilizing NCSA seems to be more aggresive than using jade 
+this is reflected in the summary statistics where the standard devieaiton
+of the signal is smallest with nsca. At a high level observation the
+signals appear to be rather similar. On closer inspection we can see
+differences in denoising ocmparing signals from channel 1 and channel 3 
+in the region with a rapid change seen in the 31 second mark we see that
+jade trails behind the NSCA method. Suggesting that jade is more sensitive
+to rapid changes in signals. Meanwhile for regions with a consistent
+signal( i.e flat)  both methods are in agreement 
+
+## Ex 5 
+
+
+this script appears to be using several model estimation techniques to
+fit a polynomial curve onto the the ECG signal.  It allows for an
+interective interpretatatio nof the data which i find quite useful.
+Interestingly enough we can get an ok fit with just 2 parameters to be fit producing an error of 1.82% 
+ meanwhile for the 
+
+# Reading report 
+
+A comparison of PCA, KPCA, and ICA for dimensionality reduction in support vector machine 
+
+SVM was adopted "due to its remarkable characteristics such as good generalization performance, the absense of local minima and the sparse representation no solution" . SVMs are liked since they have the tendency to minimize the upper bound of the generalization error. Traditionally most applications would have used the raw feature space to do classification, but this is meant with a few limitations. Model performance is impacted by the correlated nature of the raw features. The scientific community therefore adopted several feature selection and/or extractions to minimize the number of unnecessary techniques.  One series of feature extraction techniques that gained prominence was the dimensionality reduction techniques seen in PCA and KPCA. PCA using singular value decomposition produced a sieres of uncorrelated features. Meanwhile kernel PCA focuses on project the data onto a new space and applying linear PCA onto it. This allows KPCA to better generalize for non-linear features. ICA on the other hand focuses on identifying features that are statistically independent for each other. These different feature projection schemes result in varying feature sets, therefore, to validate their usefulness the researchers looked into 3 different datasets. They trained a SVM regression model of several openly available datasets. All 3 datasets had variables that provided a summary value of multiple events making their behavior nonlinear in most cases.  It is for this reason that the best performing model was thus the KPCA followed by ICA. Although the data appeared to originate from different smaller sources which could have benefited from ICAs source separations abilities the top performer was still    It is important to note that  although  KPCA was best performing it was also the slowest to run .  PCA was the fastest model followed by ICA. Suggesting there are potential trade off considerations to be had when deciding which program to be used.  ICA may not perform as well as KPCA but it’s quicker runtime would provide substantial benefit to biomedical applciaitons .
diff --git a/eig_val_time.png b/eig_val_time.png
diff --git a/eig_ve_component.png b/eig_ve_component.png
diff --git a/fastica_pow3_out.png b/fastica_pow3_out.png
diff --git a/fastica_skew_out.png b/fastica_skew_out.png
diff --git a/fastica_tanh_out.png b/fastica_tanh_out.png
diff --git a/hand_work_eigen.png b/hand_work_eigen.png