diff --git a/BMI500_lab11_q1.JPG b/BMI500_lab11_q1.JPG new file mode 100644 index 0000000..36f1806 Binary files /dev/null and b/BMI500_lab11_q1.JPG differ diff --git a/BMI_lab11_q2b.jpg b/BMI_lab11_q2b.jpg new file mode 100644 index 0000000..d4c0ca1 Binary files /dev/null and b/BMI_lab11_q2b.jpg differ diff --git a/EigenAnalysisPowerMethod.m b/EigenAnalysisPowerMethod.m new file mode 100644 index 0000000..82ac558 --- /dev/null +++ b/EigenAnalysisPowerMethod.m @@ -0,0 +1,8 @@ +function v = EigenAnalysisPowerMethod(A, v0, Itr) + +v = v0(:); +for k = 1 : Itr + v = A * v; + v = v / sqrt(v' * v); +end + diff --git a/Ex01_testPCA.m b/Ex01_testPCA.m index 1181070..94248f3 100644 --- a/Ex01_testPCA.m +++ b/Ex01_testPCA.m @@ -1,5 +1,5 @@ % Principal component analysis -% +% Jiwoong Jason Jeong % BMI500 Course % Lecture: An Introduction to Blind Source Separation and Independent Component Analysis % By: R. Sameni @@ -16,7 +16,7 @@ clear close all -example = 1; +example = 2; % just changed the test example to see the second one. switch example case 1 % Load a sample EEG signal load EEGdata textdata data % A sample EEG from the OSET package @@ -28,7 +28,8 @@ load SampleECG2 data % A sample ECG from the OSET package fs = 1000; x = data(:, 2:end)'; % make the data in (channels x samples) format - x = x - LPFilter(x, 1.0/fs); % remove the lowpass baseline + %x = x - LPFilter(x, 1.0/fs); % remove the lowpass baseline + %(removed lowpass filter) otherwise error('unknown example'); end @@ -36,3 +37,63 @@ N = size(x, 1); % The number of channels T = size(x, 2); % The number of samples per channel +% plot raw data +PlotECG(x,4,'b',fs,'Raw data channels'); + +% remove the mean to make mean = 0 +x_demeaned = x - mean(x,2)*ones(1,size(x,2)); + +% getting the covariants of the zero mean data +Cx = cov(x_demeaned'); + +% getting the eigenvectors of the covariants of x +[V,D] = eig(Cx,'vector'); + +% plotting the eigenvalues +figure +subplot(1,2,1) +plot(D(end:-1:1)) +grid +xlabel('Index') +ylabel('Eigenvalues'); +title('Eighenvalues in linear scale') +subplot(122) +plot(10*log10(D(end:-1:1)/D(end)) ) +grid +xlabel('Index'); +ylabel('Eigenvalue ratios in dB') +title('Normalzied eighenvalues in log scale') + + +x_var = var(x_demeaned,[] ,2 ) %formula 1 +x_var2 = diag(Cx); % formula 2 +% decorrelate the channels +y = V' * x_demeaned; +Cy = cov(y'); +y_var = diag(Cy) + +%check total energy match +x_total_energy = sum(x_var) +Cx_trace = trace(Cx) +eigenvale_sum = sum(D) +Cy_trace = trace(Cy); + +%these values should all be the same as the rotaion doesn't change relative +%distances we are still looking at the same distances. +x_partial_energy = 100.0 * cumsum(D(end:-1:1))./x_total_energy; + +th = 99.9; +N_eighs_to_keep = find(x_partial_energy <= th,1,'last') + +x_compressed = V(:,N - N_eighs_to_keep + 1:N)*y(N-N_eighs_to_keep + 1:N,:); + + +t = (0: T-1)/fs; + +for ch = 1:N + figure + hold on + plot(t,x(ch,:)); + plot(t,x_compressed(ch,:)); + legend(['channel ',num2str(ch),'compressed']) +end \ No newline at end of file diff --git a/Ex02_testEigenAnalysisPowerMethod.m b/Ex02_testEigenAnalysisPowerMethod.m index f1d95be..7324788 100644 --- a/Ex02_testEigenAnalysisPowerMethod.m +++ b/Ex02_testEigenAnalysisPowerMethod.m @@ -1,5 +1,5 @@ % The power method for eigenvalue decomposition -% +% Jiwoong Jason Jeong % BMI500 Course % Lecture: An Introduction to Blind Source Separation and Independent Component Analysis % By: R. Sameni @@ -17,10 +17,30 @@ clc; % Build a random signal -N = 3; +N = 2; % changed to 2 channels T = 1000; a = randn(1, N); x = diag(a) * randn(N, T); % Cx = x * x'; Cx = cov(x'); +% Apply eigenvalue decomposition +% Read 'eig' help and compare with 'eigs' +[V,D] = eig(Cx) + +Itr = 1000; % The number of power method iterations (changed to 1000) + +v0 = rand(N, 1); +v1 = EigenAnalysisPowerMethod(Cx, v0, Itr); +scale1 = (Cx*v1)./v1; +lambda1 = mean(scale1) + +C = Cx - lambda1 * (v1 * v1'); +v2 = EigenAnalysisPowerMethod(C, v0, Itr); +scale2 = (Cx*v2)./v2; +lambda2 = mean(scale2) + +C = C - mean(lambda2) * (v2 * v2'); +v3 = EigenAnalysisPowerMethod(C, v0, Itr); +scale3 = (Cx*v3)./v3; +lambda3 = mean(scale3) diff --git a/Ex03_testICAmethods.m b/Ex03_testICAmethods.m index af123be..9e24394 100644 --- a/Ex03_testICAmethods.m +++ b/Ex03_testICAmethods.m @@ -1,5 +1,5 @@ % Independent component analysis using classical methods -% +% Jiwoong Jason Jeong % BMI500 Course % Lecture: An Introduction to Blind Source Separation and Independent Component Analysis % By: R. Sameni @@ -30,7 +30,7 @@ x = data(:, 2:end)'; % make the data in (channels x samples) format x = x - LPFilter(x, 1.0/fs); % remove the lowpass baseline case 3 % A synthetic signal - fs = 500; + fs = 1000; % changed fs to 1000 len = round(3.0*fs); s1 = sin(2*pi*7.0/fs * (1 : len)); s2 = 2*sin(2*pi*1.3/fs * (1 : len) + pi/7); @@ -46,3 +46,35 @@ N = size(x, 1); % The number of channels T = size(x, 2); % The number of samples per channel +% Plot the channels +PlotECG(x, 4, 'b', fs, 'Raw data channels'); + +% Run fastica +approach = 'symm'; % 'symm' or 'defl' +g = 'skew'; % 'pow3', 'tanh', 'gauss', 'skew' % changed to different ones +lastEigfastica = N; % PCA stage +numOfIC = N; % ICA stage +interactivePCA = 'off'; +[s_fastica, A_fatsica, W_fatsica] = fastica (x, 'approach', approach, 'g', g, 'lastEig', lastEigfastica, 'numOfIC', numOfIC, 'interactivePCA', interactivePCA, 'verbose', 'off', 'displayMode', 'off'); + +% Check the covariance matrix +Cs = cov(s_fastica'); + +% Run JADE +lastEigJADE = N; % PCA stage +W_JADE = jadeR(x, lastEigJADE); +s_jade = W_JADE * x; + +% Run SOBI +lastEigSOBI = N; % PCA stage +num_cov_matrices = 100; +[W_SOBI, s_sobi] = sobi(x, lastEigSOBI, num_cov_matrices); + +% Plot the sources +PlotECG(s_fastica, 4, 'r', fs, 'Sources extracted by fatsica'); + +% Plot the sources +PlotECG(s_jade, 4, 'k', fs, 'Sources extracted by JADE'); + +% Plot the sources +PlotECG(s_sobi, 4, 'm', fs, 'Sources extracted by SOBI'); diff --git a/Ex04_testEOGArtifactRemoval.m b/Ex04_testEOGArtifactRemoval.m index b6c4122..2475a45 100644 --- a/Ex04_testEOGArtifactRemoval.m +++ b/Ex04_testEOGArtifactRemoval.m @@ -1,5 +1,5 @@ % Removing EOG artifacts from EEG signals -% +% Jiwoong Jason Jeong % BMI500 Course % Lecture: An Introduction to Blind Source Separation and Independent Component Analysis % By: R. Sameni @@ -20,7 +20,7 @@ switch example case 1 % A sample EEG from the OSET package load EEGdata textdata data % Load a sample EEG signal - fs = 250; + fs = 125; % halved the fs x = data'; % make the data in (channels x samples) format % Check the channel names disp(textdata) @@ -32,3 +32,63 @@ T = size(x, 2); % The number of samples per channel t = (0 : T - 1)/fs; +% Plot the channels +PlotECG(x, 4, 'b', fs, 'Raw data channels'); + +% Run JADE +lastEigJADE = N; % PCA stage +W_JADE = jadeR(x, lastEigJADE); +s_jade = W_JADE * x; +A_jade = pinv(W_JADE); % The mixing matrix + +% Plot the sources +% PlotECG(s_jade, 4, 'k', fs, 'Sources extracted by JADE'); + +% Channel denoising by JADE +eog_channel = [2 3]; % check from the plots to visually detect the EOG +s_jade_denoised = s_jade; +s_jade_denoised(eog_channel, :) = 0; +x_denoised_jade = A_jade * s_jade_denoised; + +% Channel denoising by NSCA +eog_channel = 24; +eog_ref = x(eog_channel, :); +energy_envelope_len = round(0.25*fs); +eog_envelope = sqrt(filtfilt(ones(1, energy_envelope_len), energy_envelope_len, eog_ref.^2)); +med = median(eog_envelope); +mx = max(eog_envelope); +eog_detection_threshold = 0.95 * med + 0.05 * mx; + +J = eog_envelope >= eog_detection_threshold; +I = 1 : T; + +[s_nsca, W_nsca, A_nsca] = NSCA(x,J, I); +% Channel denoising by JADE +eog_channel = [1 2]; % check from the plots to visually detect the EOG +s_nsca_denoised = s_nsca; +s_nsca_denoised(eog_channel, :) = 0; +x_denoised_nsca = A_nsca * s_nsca_denoised; +% PlotECG(s_nsca, 4, 'r', fs, 'Sources extracted by NSCA'); + +figure +hold on +plot(t, eog_ref); +plot(t, eog_envelope); +plot(t, eog_detection_threshold(ones(1, T))); +grid +legend('EOG reference channel', 'EOG envelope'); + +% Plot the denoised signals +for ch = 1 : N + figure + hold on + plot(t, x(ch, :)); + plot(t, x_denoised_jade(ch, :)); + plot(t, x_denoised_nsca(ch, :)); + legend(['channel ' num2str(ch)], 'denoised by JADE', 'denoised by NSCA'); + grid +end + +% Run the following script from the OSET package for a more advanced method +%testEOGRemoval + diff --git a/Ex05_testFetalECGExtraction.m b/Ex05_testFetalECGExtraction.m index 3ac2f31..32d958d 100644 --- a/Ex05_testFetalECGExtraction.m +++ b/Ex05_testFetalECGExtraction.m @@ -1,5 +1,5 @@ % Extracting fetal ECG signals using various ICA algorithms -% +% Jiwoong Jason Jeong % BMI500 Course % Lecture: An Introduction to Blind Source Separation and Independent Component Analysis % By: R. Sameni @@ -13,9 +13,9 @@ % % Uncomment the following lines to run advanced demos -testPCAICAPiCAfECGExtraction % Call this demo from the OSET package +%testPCAICAPiCAfECGExtraction % Call this demo from the OSET package % testECGICAPCAPiCAPlot1 % testECGICAPCAPiCAPlot1 % testAveragefECGbyDeflationAndKF % testICAPiCAAfterMECGCancellation % (using the deflation algorithm + Kalman filter) -% testPCAICAPiCAfECGDenoising % (denoising using BSS and semi-BSS methods) \ No newline at end of file +testPCAICAPiCAfECGDenoising % (denoising using BSS and semi-BSS methods) \ No newline at end of file diff --git a/README.md b/README.md index a14ae45..4faaa5c 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,84 @@ # BSSPackage Simplified illustration of blind-source separation algorithms + +### Student Name: Jiwoong Jason Jeong +### Student Email: jjjeon3@emory.edu +*** +### Question 1: +##### Part A and B +See attached picture 'BMI500_lab11_q1.JPG' +*** +### Question 2: +##### Part A +I created a simple power method calculation of the eigenvalues as +'powerMethod.m'. Using that code, when you run the following: +>> powerMethod([1;1],C,1/100) +you get the eigenvalues to be 3.9069 and 2.4614. + +I didn't see that there was a power analysis code provided but it does +the same thing. +##### Part B +See attached figure 'BMI_lab11_q2b.jpg' +*** +### Question 3: +##### Part A +###### Ex01 +I didn't change the code that much at all other than changing to a +different example (example 2: SampleECG2) and test turning on or off the +lowpass filter. Example 2 is different from the first example but the +biggest change was from the lowpass filter. When the lowpass filter was +removed, the time variant aspect of the signal was not filtered out and the +final plots were not on a uniform/horizontal axis. When the lowpass filter +is on, all plots are normalized/had the time variance removed and was much +easier to see the harmonic signals. +###### Ex02 +It was a simple power method of analyzing the eigenvalues so I just changed +the number of channels to 2. As expected, changing the number of channels +changes it to a 2x2 matrix and 2x1 eigenvector. Additionally, the number of +iterations was changed to 1000. The accuracy of the power method increased +as the number of iterations increased, which was expected. +###### Ex03 +I only changed the sampling frequency (fs) from 500 to 1000 and the results +were changed as we expected. The signal length was doubled from 1,500 to +3,000. Other than that, there really wasn't any unexpected changes. I also +changed the g ('pow3', 'tanh', 'gauss', 'skew') and didn't see much +differences between them except for the 'skew' where the signals were very +different. Which seems to indicate that choosing the nonlinearity is fairly +important. +###### Ex04 +Ran the code by halving the sampling frequency (fs) from 250 to 125 but did +not see much of a difference if at all which may suggest that the code is +robust to sampling frequency. I did run a second dataset (EEGdata2) but it +was too much for my computer to handle so I stopped it early. However, the +second dataset seemed to look more interesting as there were time variances +within the dataset. JADE seems to be a BSS method or implementation. +###### Ex05 +I ran the last example for my ex05. As I understand, denoises the mixed +maternal and fetal signals and then analyzes each based on the different +components of the signal using different algorithms like JADE. Beyond this, +I did not understand the code very well. +*** +### Question 4: +##### Part A +Chosen paper: +A comparison of PCA, KPCA and ICA for dimensionality +reduction in support vector machine + +Summary: The paper discussed and showed the effectiveness of applying +different component analyses namely, principal component analysis (PCA), +kernel principal component analysis (KPCA) and independent component +analysis (ICA) on a failry simple machine learning model (support vector +machine (SVM)). While machine learning methods extract features, the +importance of those features may be questionable and sometimes may be +confounding noise. These component analysis methods allow the extraction +of the most relevant features for the model to analyze and learn for their +tasks. They have shown in their paper that KPCA performed the best which +makes sense as in deep learning models, convolutional layers with kernels +are used to extract features like edges. As I understand, the KPCA allows +the extraction of specific principle components which can not be linearly +separated. This increases the accuracy of SVM. + +As for the pseudo code of the KPCA algorithm, it's pretty much the same as +linear PCA (eigen analysis) but with the addition of the kernel that +converts the data into a "feature space" to perform the linear PCA on. + diff --git a/plotPower.m b/plotPower.m new file mode 100644 index 0000000..53acc4d --- /dev/null +++ b/plotPower.m @@ -0,0 +1,14 @@ +function plotPower() +% +%Power method for computing eigenvalues +% +C = [4 1;1 3] +z = [1;1] +for x = 0:20 + plot(x,z(1),'b*') + hold on; + plot(x,z(2),'r*') + y=C*z; + n=norm(z); + z=y/n +end \ No newline at end of file diff --git a/powerMethod.m b/powerMethod.m new file mode 100644 index 0000000..8a1af8b --- /dev/null +++ b/powerMethod.m @@ -0,0 +1,20 @@ +function [vec,value]=power(start,A,toler) +% +%Power method for computing eigenvalues +% +dd=1; +x=start; +n=10; +i = 0 +iter = 20 +while dd> toler + y=A*x; + dd=abs(norm(x)-n); + n=norm(x); + x=y/n; + while i < iter + plot(x, i) + end +end +vec=x; +value=n; \ No newline at end of file