% Analyze HMM Segment output from QuB for SiMREPS % % Before running this script, please paste the HMM 'segments' matrix from % each movie into its own sheet in an excel book, without a header row. % The name of the sheet will be used as the title of the corresponding % plots. %PL % % Filters molecules as follows: % 1. Intensity difference (|column 4 - column 3|) exceeds threshold value % 2. Signal-to-noise (|column 4 - column 3|/column 5) exceeds threshold % value % % Converts "number of events" in columns 19 and 20 into number of % transitions % % Creates histogram of transitions. % % OPTIONAL OUTPUT: *_acceptedMols.mat - list of accepeted molecules; excel sheet titles % *_rejectedMols.mat - list of rejected molecules; excel sheet titles % *_valuesOut.xlsx - summary excel file with values of % the filtering criteria for each molecule. Values are shown in the % order in which the filtering cirteria are applied in the order shown % (columns from left to right). I.e., filtering by intensity difference between states and S/N % occurs before filtering by median tau values, occurs before filtering by transitions (N(B+D)) % %%%%%%%%% close all clear all %% Parameters to set %%%% %Filtering Criteria Ithresh = 500; % Intensity threshold SNthresh = 3; % Signal-to-noise threshold; 3 for T790M % SNthresh = 2.5; % min_transitions = 20; % Minimum number of intensity transitions for inclusion min_transitions = 15; % 15 for L858R min_tau_on_median = 5;% Smallest median dwell time in the bound state to be counted as a positive molecule % min_tau_off_median = 3; % Smallest median dwell time in the unbound state % to be counted as a positive molecule; 3 for T790M min_tau_off_median = 15; %15 for L858R % min_tau_on_median = 1;% Smallest median dwell time in the bound state to be counted as a positive molecule % min_tau_off_median = 1; % Smallest median dwell time in the unbound state to be counted as a positive molecule % If not using max values for filtering, set to Inf (infinity) max_tau_on_median = 20; % Largest median dwell time in the bound state to be counted as a positive molecule % max_tau_off_median = 30; % Largest median dwell time in the unbound state to be counted as a positive molecule max_tau_off_median = 50; %50 for L858R % max_tau_on_median = 5; % Largest median dwell time in the bound state to be counted as a positive molecule % max_tau_off_median = 100; % Largest median dwell time in the unbound state to be counted as a positive molecule use_auto_transition_treshold = 0; % If set to 1, the min_transitions value will be replaced by mean + std_factor*std std_factor = 2.5; %Optional outputs, metrics acceptedMolsList = true; %set to true to save list of positive molecules rejectedMolsList = true; %set to true to save list of rejected molecules valuesOut= true; %set to true to save an excel sheet with the values for each molecule showAcceptedMols = false; %set to true to show traces for all positive molecules showRejectedMols = false; %set to true to show traces for all rejected molecules % Plot options binsize = 2; % Bin size in transition histogram lwidth = 2; fontsize = 16; xl = [1 1000]; yl = [1 1000]; % Specify x and y axis limits for tau log-log plot if desired % %%% End of Parameters to set %% %% [TestFileName, TestPathName] = uigetfile('*.*','Please select your Excel File','Multiselect','off'); FileName = strcat(TestPathName,TestFileName); [status, sheets] = xlsfinfo(FileName) %Get names of all sheets in workbook %Create output filenames if valuesOut fname3 = strcat(deblank(strjoin(strsplit(FileName,'.xlsx'))),'_valuesOut.xlsx'); % generate excel summary file name warning('off','MATLAB:xlswrite:AddSheet') end %if if acceptedMolsList accMols = cell(numel(sheets),1); fname4 = strcat(deblank(strjoin(strsplit(FileName,'.xlsx'))),'_acceptedMols.mat'); end if rejectedMolsList rejMols = cell(numel(sheets),1); fname5 = strcat(deblank(strjoin(strsplit(FileName,'.xlsx'))),'_rejectedMols.mat'); end for j =1:numel(sheets) m = xlsread(FileName,sheets{j}); disp(strcat('Done reading excel for ',sheets{j})); if size(m,2) == 25 nmols = size(m,1); transitions = nan(nmols,1); tau_off = zeros(nmols,1); tau_on = tau_off; tau_off_median = tau_off; tau_on_median = tau_on; disp('Molecules detected:'); disp(num2str(nmols)); % Convert dwell times to seconds from miliseconds m(:,14:15) = m(:,14:15)./1000; m(:,17:18) = m(:,17:18)./1000; Idiff = abs(m(:,4)-m(:,3)); SN1 = Idiff./m(:,5); SN2 = Idiff./m(:,6); for n = 1:nmols if SN1(n) == Inf SN1(n) = 0; end if SN2(n) == Inf SN2(n) = 0; end if SN1(n) > SNthresh && SN2(n) > SNthresh && Idiff(n) > Ithresh if m(n,4) > m(n,3) tau_off(n) = m(n,14); tau_on(n) = m(n,15); tau_off_median(n) = m(n,17); tau_on_median(n) = m(n,18); else tau_off(n) = m(n,15); tau_on(n) = m(n,14); tau_off_median(n) = m(n,18); tau_on_median(n) = m(n,17); end % Filter by tau values if tau_on_median(n) >= min_tau_on_median && tau_off_median(n) >= min_tau_off_median && tau_on_median(n) <= max_tau_on_median && tau_off_median(n) <= max_tau_off_median transitions(n) = sum(m(n,19:20))-1; % sum nevents in state 1 and state 2 to get number of transitions end end end %% Not sure what this block of code is used for, none of the variables get used later %PL % mean_transitions = nanmean(transitions); %Not sure why this is % here, seems to be overwritten below done = 0; % transitions_trim = sort(removeNaNrows(transitions)); %rewrite to % work w/o removeNaNrows.m accessory function transitions2 = transitions(~isnan(transitions(:))); transitions_trim = sort(transitions2); while done == 0 if isempty(transitions_trim) done = 1; else % std_transitions = nanstd(transitions_trim); std_transitions = std(transitions_trim); % mean_transitions = nanmean(transitions_trim); mean_transitions = mean(transitions_trim); threshold = mean_transitions+std_factor*std_transitions; if isempty(transitions_trim) %check if transitions_trim has any elements elseif transitions_trim(length(transitions_trim)) > threshold transitions_trim = transitions_trim(1:length(transitions_trim)-1); else done = 1; end end end %% if use_auto_transition_treshold == 1 min_transitions = threshold; threshold end % hist(transitions,binsize/2:binsize:max(transitions)+1); accept = 0; reject = 0; acceptedmols = zeros(0,1); rejectedmols = zeros(0,1); transitions_filt = zeros(0,1); tau_on_filt = zeros(0,1); tau_off_filt = zeros(0,1); tau_on_median_filt = tau_on_filt; tau_off_median_filt = tau_off_filt; for i = 1:nmols if transitions(i) >= min_transitions % && corrcoefs(i) > min_corr && on_sigtonoise_all(i) > threshold accept = accept+1; acceptedmols = cat(1,acceptedmols,i); transitions_filt = cat(1,transitions_filt,transitions(i)); tau_on_filt = cat(1,tau_on_filt, tau_on(i)); tau_off_filt = cat(1,tau_off_filt, tau_off(i)); tau_on_median_filt = cat(1,tau_on_median_filt, tau_on_median(i)); tau_off_median_filt = cat(1,tau_off_median_filt, tau_off_median(i)); else reject = reject+1; rejectedmols = cat(1,rejectedmols,i); end end [yhist xhist] = hist(transitions,binsize/2:binsize:200); figure(1) bar(xhist,yhist,'FaceColor',[0.7 0.7 0.7],'BarWidth',1); set(gca,'FontSize',fontsize,'LineWidth',lwidth,'XTick', 0:20:200); yhist = yhist'; xhist = xhist'; xlabel('Number of Transitions'); ylabel('Number of Candidates'); title(strcat(sheets{j},': accept =',num2str(accept))); fname1 = strcat(TestPathName,sheets{j},'_hist.jpg'); print(figure(1),'-r150','-djpeg',fname1); figure(2); % plot(tau_on,tau_off,'k+'); hold on; plot(tau_on_filt,tau_off_filt,'ro', 'LineWidth', 2); hold off; xlabel('\tau_o_n (s)'); ylabel('\tau_o_f_f (s)'); loglog(tau_on,tau_off,'k+'); hold on; plot(tau_on_filt,tau_off_filt,'ro', 'LineWidth', 2); hold off; xlabel('\tau_o_n (s)'); ylabel('\tau_o_f_f (s)'); set(gca,'FontSize',fontsize,'LineWidth',lwidth) if exist('xl','var')==1 xlim(gca,xl); end if exist('yl','var')==1 ylim(gca,yl); end axis square; title(strcat(sheets{j},': accept =',num2str(accept))); fname2 = strcat(TestPathName,sheets{j},'_tauPlot.jpg'); print(figure(2),'-r150','-djpeg',fname2); else disp('This program currently only works with segment data from 2-state analysis in QuB.'); disp('The matrix "data" contains a number of columns other than 25. Analysis aborted.'); end if valuesOut % Create cell object with values and write to excel %Re-calculate transitions, median taus for all molecules vals_tau_off_median = nan(nmols,1); vals_tau_on_median = nan(nmols,1); NBD = nan(nmols,1); accRej = cell(nmols,1); for n=1:nmols if m(n,4) > m(n,3) %Determine whether state1 or state2 is the bound state % tau_off(n) = m(n,14); % tau_on(n) = m(n,15); vals_tau_off_median(n) = m(n,17); vals_tau_on_median(n) = m(n,18); else % tau_off(n) = m(n,15); % tau_on(n) = m(n,14); vals_tau_off_median(n) = m(n,18); vals_tau_on_median(n) = m(n,17); end NBD(n) = sum(m(n,19:20))-1; % sum nevents in state 1 and state 2 to get number of transitions end %for %Populate Accept/Reject column for k=1:size(acceptedmols) accRej{acceptedmols(k)}='Accept'; end for k=1:size(rejectedmols) accRej{rejectedmols(k)}='Reject'; end %Combine into siingle cell array and write to excel header = [{'Segment'} {'Accept/Reject'} {'Intensity Diff'} {'SN1'} ... {'SN2'} {'median tau on'} {'median tau off'} {'N(B+D)'}]; vals = [num2cell(m(:,1)), accRej, num2cell(Idiff), num2cell(SN1), ... num2cell(SN2), num2cell(vals_tau_on_median), num2cell(vals_tau_off_median), num2cell(NBD)]; vals = vertcat(header,vals); xlswrite(fname3,vals,sheets{j},'A1'); end %if %Store accepted and rejected molecule lists if acceptedMolsList accMols{j,1} = acceptedmols; end if rejectedMolsList rejMols{j,1} = rejectedmols; end if showAcceptedMols || showRejectedMols % Prompt user to select corresponding traces file rightFile = 0; while rightFile ==0 [TracesFileName, TracesPathName] = uigetfile('*traces.dat','Please select corresponding traces file','Multiselect','off'); FileNameTraces = strcat(TracesPathName,TracesFileName); disp(TracesFileName); load(FileNameTraces,'-mat'); if size(m,1) == size(traces,1) rightFile = 1; %dimensions mismatch end end end %Display Accepted Candidates if showAcceptedMols for i = 1:size(acceptedmols,1) figure(1) plot(traces(acceptedmols(i),:)); disp(num2str(acceptedmols(i))); pause; end %for end %if %Display Rejected Candidates if showRejectedMols for i = 1:size(rejectedmols,1) figure(1) plot(traces(rejectedmols(i),:)); disp(num2str(rejectedmols(i))); pause; end %for end %if end %for sheets %% Save molecule list variables for use later if acceptedMolsList save(fname4,'accMols','sheets'); end if rejectedMolsList save(fname5,'rejMols','sheets'); end