import math
import numpy
import pylab

# Generate a data set around t = sin(4*pi*x) with gaussian noise with precision beta
# N    ... the number of data points to generate
# beta ... precision of gaussian noise
# returns two numpy arrays of equal length as a tuple (data-points, target-values)
def CreateData(N, beta):

    if not beta>0:
        raise TypeError, "beta must be a positive number"

    # initialise x from a uniform distn on (0,1)
    x = numpy.random.random(N).T
    # utilise element-wise numpy operations to generate target values
    t = numpy.sin(4*math.pi*x)*x + numpy.random.normal(0,1/math.sqrt(beta),N).T
    return x, t

# calculate the gaussian function
# x        ... input variable
# mu       ... 'mean'
# sigma_sq ... scaling factor for input x
def UnitGaussian(x, mu=0, sigma_sq=1):
    return numpy.exp( (x-mu)**2/sigma_sq )

# calculate the sigmoid function
# x     ... input variable
# mu    ... 'mean'
# sigma ... scaling factor for input x
def UnitSigmoid(x, mu=0, sigma=1):
    return 1/(1+numpy.exp( (x-mu)*sigma ))

# calculate the matrix Phi = [ basis_fn_i(data_value_j) ]
# x            ... a numpy array of data values
# t            ... a numpy array of target values (same lenght as x)
# which_basis  ... can have 3 values: "polynomial", "gaussian", "sigmoid"
# basis_params ... a list of parameters for the basis functions:
#   "polynomial" - a tuple containing one element n. Basis fn's are: 1, x, x**2, ... ,x**(n-1)
#   "gaussian"   - a tuple (n, means, sigmas). means and sigmas are lists - phi_i(x) = N(means[i], sigmas[i]**2)
#   "sigmoid"    - a tuple (n, means, sigmas). means and sigmas are lists - phi_i(x) = sigmoid( sigmas[i]*(x-means[i]) )
def CalculatePhi(x, t, which_basis, basis_params):
    
    Phi = numpy.mat(numpy.zeros((x.shape[0], basis_params[0])))
    for i in range(x.shape[0]):
        for j in range(basis_params[0]):
            if j==0:
                Phi[i,j] = 1 # the first basis fn is allways constant
            else:
                if which_basis == "polynomial":
                    Phi[i,j] = x[i]**j
                elif which_basis == "gaussian":
                    Phi[i,j] = UnitGaussian(x[i], basis_params[1][j], basis_params[2][j])
                elif which_basis == "sigmoid":
                    Phi[i,j] = UnitSigmoid(x[i], basis_params[1][j], basis_params[2][j])
    return Phi

# perform linear regression with given basis functions and training data
# for parameters, see above
# dummy ... a dummy parameter that is not used (so parameters coincide with RegressionWithRegulariser)
# returns the maximum likelihood solution as a numpy array of weights corresponding to phi_fns
def RegressionWithoutRegulariser(x, t, which_basis, basis_params, dummy):
    
    Phi = CalculatePhi(x, t, which_basis, basis_params)
    Phi_dagger = numpy.linalg.inv(Phi.T*Phi)*Phi.T
    t = numpy.mat(t).T
    return Phi_dagger*t

# perform linear regression with given basis functions and training data
# for parameters, see above
# lamb ... a number - the regularisation parameter
# returns the regularised maximum likelihood solution as a numpy array of weights corresponding to phi_fns
def RegressionWithRegulariser(x, t, which_basis, basis_params, lamb):
    
    Phi = CalculatePhi(x, t, which_basis, basis_params)
    Phi_dagger = numpy.linalg.inv(lamb*numpy.eye(basis_params[0])+Phi.T*Phi)*Phi.T
    t = numpy.mat(t).T
    return Phi_dagger*t

# evaluate the model on a set of data
# x            ... a numpy array of data values
# w            ... a numpy array of model weights
# which_basis  ... can have 3 values: "polynomial", "gaussian", "sigmoid"
# basis_params ... a list of parameters for the basis functions:
#   "polynomial" - a list of n ignored values. Basis fn's are: 1, x, x**2, ... ,x**(n-1)
#   "gaussian"   - a list of n (mean, sigma) tuples - phi_n(x) = N(mean, sigma**2)
#   "sigmoid"    - a list of n (mean, lambda) tuples - phi_n(x) = sigmoid( lambda*(x-mean) )
# returns a numpy array of model values
def EvaluateModel(x, w, which_basis, basis_params):
    
    result = numpy.zeros((len(x)))
    for i in range(len(x)):
        for j in range(basis_params[0]):
            if j==0:
                result[i] = w[j,0] # the first basis fn is allways constant
            else:
                if which_basis == "polynomial":
                    result[i] += w[j,0]*x[i]**j
                elif which_basis == "gaussian":
                    result[i] += w[j,0]*UnitGaussian(x[i], basis_params[1][j], basis_params[2][j])
                elif which_basis == "sigmoid":
                    result[i] += w[j,0]*UnitSigmoid(x[i], basis_params[1][j], basis_params[2][j])
    return result

# evaluate the sum of squared differences between elements of two arrays
# t1,t2 ... numpy arrays with the same dimentions
def SumSquares(t1, t2):
    return ((t1-t2)**2).sum()


# ************** Run the simulations and tests ********************

# generate points for plotting original fn and model
x_plot = numpy.arange(1000)*1.0/1000
t_plot = numpy.sin(4*math.pi*x_plot)*x_plot

# create training and test data sets
x,t = CreateData(30,4)
x_test, t_test = CreateData(30,4)

# setup model parameters
M = 15 # number of basis functions
basis_params = (M, (numpy.arange(M)+1)*1.0/(M+1) , numpy.ones(M)*3.0/M) # means and variances of basis fns
lamb = .001  # regularisation parameter

# run regression with and without regulariser, plot modelled fn and print sum sq errors on test data
for RegrFn in (RegressionWithoutRegulariser, RegressionWithRegulariser):

    # plot the generated data points and modelled function
    pylab.plot(x_plot,t_plot,"r-")
    pylab.plot(x,t,"b+")
    
    for (which_basis,colour) in (("polynomial","g-"), ("gaussian","c-"), ("sigmoid","y-")):
        #which_basis = "polynomial"
        w = RegrFn(x, t, which_basis, basis_params, lamb)
        t_eval = EvaluateModel(x_plot, w, which_basis, basis_params)
        pylab.plot(x_plot,t_eval,colour)
        # now evaluate predictions for test data set and calculate sum sq err
        t_eval = EvaluateModel(x_test, w, which_basis, basis_params)
        print "Sum of squares error for ",which_basis," ",SumSquares(t_test, t_eval)
    
    #generate test set, plot it and calculate the sum of squares error
    pylab.plot(x_test,t_test,"yo")
    
    pylab.show()

