% PURPOSE : Demonstrate PF on a simple mixture problem. We estimate the means and number of components of a mixture
%           of two Gaussians, but this can be easily extended. 

% AUTHOR   : Nando de Freitas      (jfgf@cs.berkeley.edu)
% DATE     : Feb 2001

clear all; clc; echo off;


% INITIALISATION AND PARAMETERS:
% ==============================

k = 2;                      % Number of mixture components (fixed in this demo).
T = 100;                    % Number of time steps.
N = 100;                    % Number of particles.
resamplingScheme = 1;       % The possible choices are
                            % systematic sampling (2),
                            % residual (1)
                            % and multinomial (3). 
                            % They're all O(N) algorithms. 
delta = .001;               % Drift parameter in the transition prior for mu.
kMax = 20;                  % Maximum number of components (arbitrary).
probBirth = .01;            % Probability of birth (death) move.
probUpdate = .98;           % Probability of update move.

  

% GENERATE THE DATA FROM A MIXTURE OF TWO GAUSSIAN COMPONENTS:
% ===========================================================
sigma  = 0.005;           % Fixed variances. 
mu     = [0 1];          % Means. These can be made time-varying.
lambda = .5;             % Fixed mixture weights. (Must be the same here.)
y = zeros(T,1);          % Observations.
muforplot = zeros(T,1);
for t=2:T,
  if rand(1,1) > lambda,
    y(t) = mu(2) + sqrtm(sigma).*randn(1,1);
    muforplot(t) = mu(2);
  else
    y(t) = mu(1) + sqrtm(sigma).*randn(1,1);  
    muforplot(t) = mu(1);
  end;
end;  


% PLOT THE GENERATED DATA:
% ========================
figure(1)
clf;
plot(1:T,muforplot,'k',1:T,y,'r','linewidth',2);
ylabel('Data y_{1:t}','fontsize',15);
xlabel('Time','fontsize',15);
legend('Generating mean of y','Realisation of y');

%  We will now use a particle filter to try to learn the mixture parameters 
%  mu and k given the data (y). lambda and sigma are fixed, but the program can be
%  easily extended to estimate these quantities. Watch out for identifiability 
%  issues.


%%%%%%%%%%%%%%%  PERFORM SEQUENTIAL MONTE CARLO  %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%  ==============================  %%%%%%%%%%%%%%%%%%%%%

% INITIALISATION:
% ==============
muPF = cell(N,1);            % These are the particles for the estimate
                             % of mu. Note that there's no need to store
                             % them for all t. We're only doing this to
                             % show you all the nice plots at the end.
muPredPF = cell(N,1);        % One-step-ahead predicted values of mu.
muPlot = cell(T,1);

kPF = zeros(T,N);            % These are the particles for the estimate of k.
for i=1:N,
  kPF(1,i) = discreternd(7);      % Initialise k at random in {1,2,3,4,5,6,7}.
  kPF(1,i) = min(kPF(1,i),kMax);
  kPF(1,i) = max(kPF(1,i),0);
  muPF{i} = rand(kPF(1,i),1);
end;
kPredPF = kPF;
muPredPF = muPF;
w = ones(T,N);                     % Importance weights.

disp(' ');
for t=2:T,    
  fprintf('t = %i / %i  \r',t,T);  fprintf('\n')
  
  % PREDICTION STEP:
  % ================ 
  % Record keeping for plots.
  muPlot{t} = muPF;

  % We use a Gaussian random walk transition prior as proposal for mu.
  for i=1:N,
    u = rand(1,1);
    if u < probBirth;                      % Add a component.
      modelProb = 1;
    elseif u < probBirth + probUpdate;     % Update components.
      modelProb = 0;
    else
      modelProb = -1;                      % Delete a component.
    end;    
    kPredPF(t,i) = kPF(t-1,i) + modelProb;
    kPredPF(t,i) = min(kPredPF(t,i),kMax);
    kPredPF(t,i) = max(kPredPF(t,i),0);
    muPredPF{i} = muPF{i} + sqrtm(delta)*randn(size(muPF{i}));   
    if ((kPredPF(t,i) > kPF(t-1,i)) & (kPredPF(t,i) <= kMax))
      % Add a new mean randomly:   
      muPredPF{i} = [muPredPF{i}; rand(1,1)];
    elseif ((kPredPF(t,i) < kPF(t-1,i)) & (kPredPF(t,i) >= 0)) 
      % Delete one basis function at random:
      pos = discreternd(kPF(t-1,i));
      if pos == 1,
        muPredPF{i} = muPredPF{i}(2:kPF(t-1,i));
      elseif pos == kPF(t-1,i),       
        muPredPF{i} = muPredPF{i}(1:kPF(t-1,i)-1);       
      else
        muPredPF{i} = [muPredPF{i}(1:pos-1,1); muPredPF{i}(pos+1:kPF(t-1,i),1)];
      end;
    else
      % we're fine.
    end;
  end;

  % EVALUATE IMPORTANCE WEIGHTS:
  % ============================
  % For our choice of proposal, the importance weights are give by:  
  for i=1:N,
    lik = 0;
    for j=1:kPredPF(t,i);    % Remember that here the coefficients are the same.
      lik = lik + exp(-0.5*inv(sigma)*((y(t)-muPredPF{i}(j))^(2)));
    end;   
    if kPredPF(t,i)>0;
      w(t,i) = lik/kPredPF(t,i);  % Normalise the mixture.
    else;  
      w(t,i) = lik + 1e-99;    % The small number deals with numerical problems (hack).
    end;
  end;  
  w(t,:) = w(t,:)./sum(w(t,:));                % Normalise the weights.
  
  % SELECTION STEP:
  % ===============
  % Note that resampling only depends on indices and weights (EASY!)
  if resamplingScheme == 1
    outIndex = residualR(1:N,w(t,:)');        % Residual resampling.
  elseif resamplingScheme == 2
    outIndex = systematicR(1:N,w(t,:)');      % Systematic resampling.
  else  
    outIndex = multinomialR(1:N,w(t,:)');     % Multinomial resampling.  
  end;
  kPF(t,:) = kPredPF(t,outIndex);    
  muPF=muPredPF;
  for i=1:N,
    muPF{i} = muPredPF{outIndex(i)};
  end;
                                         
end;   % End of t loop.


%%%%%%%%%%%%%%%%%%%%%  PLOT THE RESULTS  %%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%  ================  %%%%%%%%%%%%%%%%%%%%%

% Compute the Maximum a Posteriori (MAP) model order
num = zeros(kMax,1);
kMAP = zeros(T,1);
for t=2:T,
  for j=1:kMax,
    [a,b] = find(kPF(t,:) == j);
    num(j) = sum(a);
  end;
  tmp = find(num==max(num));
  kMAP(t) = tmp(1);
end;

figure(2)
clf;
kTrue = 2*ones(T,1);
plot(1:T,kMAP,'b',1:T,kTrue,'k--','linewidth',2);
ylabel('Model order k_{MAP}','fontsize',15);
xlabel('Time','fontsize',15);
legend('MAP estimate','True model order');
axis([0 100 0 6]);

% Compute plots of p(k_t|y_{1:t})
figure(3)
clf;
domain = zeros(T,1);
range = zeros(T,1);
thex=[0:.1:5];
hold on
ylabel('Time (t)','fontsize',15)
xlabel('Model order (k)','fontsize',15)
zlabel('p(k_t|y_{1:t})','fontsize',15)
for t=6:5:T,
  [range,domain]=hist(kPF(t,:),thex);
  waterfall(domain,t,range/sum(range));
end;
view(-30,80);
rotate3d on;
a=get(gca);
set(gca,'ygrid','off');

% Compute means of the first and second component when k>=2;
estimatemu1 = zeros(T,N);
estimatemu2 = zeros(T,N);
for t=2:T,
  for i=1:N,
    if kPF(t-1,i) >= 2
      estimatemu1(t,i)= muPlot{t}{i}(1,1);
      estimatemu2(t,i)= muPlot{t}{i}(2,1);
    end;
  end;
end;
figure(4)
clf;
plot(1:T,mean(estimatemu1'),'b',1:T,mean(estimatemu2'),'r','linewidth',2);
xlabel('Time','fontsize',15);
legend('Estimated \mu_1','Estimated \mu_2');
hold on;
mu1 = mu(1)*ones(T,1);
mu2 = mu(2)*ones(T,1);
plot(1:T,mu1,'k--',1:T,mu2,'k--','linewidth',2);
axis([0 100 -.1 1.1]);