Assignment 6

Published

March 24, 2025

We aim to explore various surrogate models in this assignment. We will only consider a one dimensional example to keep things simple. Lets take the classic example of

\[y = (6x-2)^2 \sin{(12x-4)}, \ x \in [0,1]\]

The function looks like this

Lets try to construct various surrogate models for this function. We will consider

Linear regression with various basis functions
Non-linear regression
Piecewise cubic spline interpolation

and compare their performance.

Sampling

Even though the functional form of the true function is known, we will assume that it is unknown from this point onwards. Therefore we need to sample the function.

Task 1

Implement following sampling techniques

Random sampling (uniform) with 5, 10 and 15 samples
Equi-spaced sampling with 5, 10 and 15 samples
Latin hypercube sampling with 5, 10 and 15 samples

Show the code

#!/bin/python3
"""============================================================================
Sampling script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n)

def TrueFunction(x):
    return (6*x-2)**2*sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(1,3,figsize=(12,6),sharey=True)
ax = ax.flatten()
ax[0].plot(RS_5,'sb')
ax[0].plot(ES_5,'or')
ax[0].plot(LHS_5,'*g')
ax[0].grid()
ax[0].set_xlabel("data points")
ax[0].set_ylabel("x")
ax[0].set_title("N = 5")

ax[1].plot(RS_10,'sb')
ax[1].plot(ES_10,'or')
ax[1].plot(LHS_10,'*g')
ax[1].grid()
ax[1].set_xlabel("data points")
ax[1].set_title("N = 10")

ax[2].plot(RS_15,'sb',label="Random")
ax[2].plot(ES_15,'or',label="Random")
ax[2].plot(LHS_15,'*g',label="LHC")
ax[2].grid()
ax[2].legend(loc=[1.1,0.5])
ax[2].set_xlabel("data points")
ax[2].set_title("N = 15")

# plt.savefig("sampling.png",dpi=150,bbox_inches="tight")

plt.show()


#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Linear Regression

Task 2

Implement linear regression with following basis functions

\((1, x, x^2)\)
\((1, x, x^2, x^3)\)
\((1, x, x^2, x^3, x^4)\)

Show the code

#!/bin/python3
"""============================================================================
polynomial linear regression script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n).flatten()

def TrueFunction(x):
    return (6*x-2)**2*np.sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

# linear regression using polynomial of given degree
def linearRegression(x,degree):
    X = np.vander(x,degree+1,True)
    y = TrueFunction(x)
    beta = np.linalg.inv(X.T@X)@X.T@y
    y_sample = X@beta
    return beta,y_sample

x = np.linspace(0,1,101)

beta_5_2,y_sample_5_2   = linearRegression(ES_5,2)
y_pred_5_2 = np.vander(x,2+1,True)@beta_5_2
beta_10_2,y_sample_10_2   = linearRegression(ES_10,2)
y_pred_10_2 = np.vander(x,2+1,True)@beta_10_2
beta_15_2,y_sample_15_2   = linearRegression(ES_15,2)
y_pred_15_2 = np.vander(x,2+1,True)@beta_15_2

beta_5_3,y_sample_5_3   = linearRegression(ES_5,3)
y_pred_5_3 = np.vander(x,3+1,True)@beta_5_3
beta_10_3,y_sample_10_3   = linearRegression(ES_10,3)
y_pred_10_3 = np.vander(x,3+1,True)@beta_10_3
beta_15_3,y_sample_15_3   = linearRegression(ES_15,3)
y_pred_15_3 = np.vander(x,3+1,True)@beta_15_3

beta_5_4,y_sample_5_4   = linearRegression(ES_5,4)
y_pred_5_4 = np.vander(x,4+1,True)@beta_5_4
beta_10_4,y_sample_10_4   = linearRegression(ES_10,4)
y_pred_10_4 = np.vander(x,4+1,True)@beta_10_4
beta_15_4,y_sample_15_4   = linearRegression(ES_15,4)
y_pred_15_4 = np.vander(x,4+1,True)@beta_15_4

# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

# degree 2
ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_2,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_2,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_2,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_2,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_2,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_2,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,x,x^2)$")

# plt.savefig("polynomial_2.png",dpi=150,bbox_inches="tight")

plt.show()

# degree 3
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_3,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_3,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_3,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_3,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_3,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_3,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,x,x^2,x^3)$")

# plt.savefig("polynomial_3.png",dpi=150,bbox_inches="tight")

plt.show()

# degree 4
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_4,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_4,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_4,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_4,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_4,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_4,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,x,x^2,x^3,x^4)$")

# plt.savefig("polynomial_4.png",dpi=150,bbox_inches="tight")

plt.show()


#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[2], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Task 3

Implement linear regression with Chebyshev basis functions

\((1, T_1(x), T_2(x))\)
\((1, T_1(x), T_2(x), T_3(x), T_4(x))\)

Show the code

#!/bin/python3
"""============================================================================
chebyshev linear regression script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc
from numpy.polynomial import chebyshev as C

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n).flatten()

def TrueFunction(x):
    return (6*x-2)**2*np.sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

# linear regression using polynomial of given degree
def linearRegression(x,degree):
    X = np.vander(x,degree+1,True)
    y = TrueFunction(x)
    beta = np.linalg.inv(X.T@X)@X.T@y
    y_sample = X@beta
    return beta,y_sample

# chebyshev regression
def chebyshevRegression(x,degree):
    y = TrueFunction(x)
    c,_ = C.chebfit(x,y,degree, full=True)
    cheb_series = C.Chebyshev(c,domain=[-1,1])
    y_pred = cheb_series(x)
    return c,y_pred
def chebyshevInference(x,c):
    cheb_series = C.Chebyshev(c,domain=[-1,1])
    y_pred = cheb_series(x)
    return y_pred

x = np.linspace(0,1,101)

beta_5_2,y_sample_5_2   = chebyshevRegression(ES_5,2)
y_pred_5_2 = chebyshevInference(x,beta_5_2)
beta_10_2,y_sample_10_2   = chebyshevRegression(ES_10,2)
y_pred_10_2 = chebyshevInference(x,beta_10_2)
beta_15_2,y_sample_15_2   = chebyshevRegression(ES_15,2)
y_pred_15_2 = chebyshevInference(x,beta_15_2)

beta_5_3,y_sample_5_3   = chebyshevRegression(ES_5,3)
y_pred_5_3 = chebyshevInference(x,beta_5_3)
beta_10_3,y_sample_10_3   = chebyshevRegression(ES_10,3)
y_pred_10_3 = chebyshevInference(x,beta_10_3)
beta_15_3,y_sample_15_3   = chebyshevRegression(ES_15,3)
y_pred_15_3 = chebyshevInference(x,beta_15_3)

beta_5_4,y_sample_5_4   = chebyshevRegression(ES_5,4)
y_pred_5_4 = chebyshevInference(x,beta_5_4)
beta_10_4,y_sample_10_4   = chebyshevRegression(ES_10,4)
y_pred_10_4 = chebyshevInference(x,beta_10_4)
beta_15_4,y_sample_15_4   = chebyshevRegression(ES_15,4)
y_pred_15_4 = chebyshevInference(x,beta_15_4)

# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

# degree 2
ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_2,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_2,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_2,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_2,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_2,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_2,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,T_1(x),T_2(x))$")

# plt.savefig("chebyshev_2.png",dpi=150,bbox_inches="tight")


plt.show()

# degree 3
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_3,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_3,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_3,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_3,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_3,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_3,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,T_1(x),T_2(x),T_3(x))$")

# plt.savefig("chebyshev_3.png",dpi=150,bbox_inches="tight")


plt.show()

# degree 4
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_4,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_4,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_4,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_4,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_4,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_4,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"$(1,T_1(x),T_2(x),T_3(x),T_4(x))$")

# plt.savefig("chebyshev_4.png",dpi=150,bbox_inches="tight")


plt.show()


#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[3], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Task 4

Implement linear regression with sine basis functions

\((1, \sin(\pi x), \sin(2\pi x))\)
\((1, \sin(\pi x), \sin(2\pi x), \sin(3\pi x))\)
\((1, \sin(\pi x), \sin(2\pi x), \sin(3\pi x), \sin(4\pi x))\)

Show the code

#!/bin/python3
"""============================================================================
sine basis linear regression script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc
from numpy.polynomial import chebyshev as C

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n).flatten()

def TrueFunction(x):
    return (6*x-2)**2*np.sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

# linear regression using polynomial of given degree
def linearRegression(x,degree):
    X = np.vander(x,degree+1,True)
    y = TrueFunction(x)
    beta = np.linalg.inv(X.T@X)@X.T@y
    y_sample = X@beta
    return beta,y_sample

# chebyshev regression
def chebyshevRegression(x,degree):
    y = TrueFunction(x)
    c,_ = C.chebfit(x,y,degree, full=True)
    cheb_series = C.Chebyshev(c,domain=[-1,1])
    y_pred = cheb_series(x)
    return c,y_pred
def chebyshevInference(x,c):
    cheb_series = C.Chebyshev(c,domain=[-1,1])
    y_pred = cheb_series(x)
    return y_pred

# sine basis regression
def sineRegression(x,degree):
    y = TrueFunction(x)
    X = np.ones_like(x)
    for i in range(1,degree+1):
        X = np.c_[X,np.sin(np.pi*i*x).T]
    beta = np.linalg.inv(X.T@X)@X.T@y
    y_sample = X@beta
    return beta,y_sample
def sineInference(x,degree,beta):
    y = TrueFunction(x)
    X = np.ones_like(x)
    for i in range(1,degree+1):
        X = np.c_[X,np.sin(np.pi*i*x).T]
    y_pred = X@beta
    return y_pred

x = np.linspace(0,1,101)

beta_5_2,y_sample_5_2   = sineRegression(ES_5,2)
y_pred_5_2 = sineInference(x,2,beta_5_2)
beta_10_2,y_sample_10_2   = sineRegression(ES_10,2)
y_pred_10_2 = sineInference(x,2,beta_10_2)
beta_15_2,y_sample_15_2   = sineRegression(ES_15,2)
y_pred_15_2 = sineInference(x,2,beta_15_2)

beta_5_3,y_sample_5_3   = sineRegression(ES_5,3)
y_pred_5_3 = sineInference(x,3,beta_5_3)
beta_10_3,y_sample_10_3   = sineRegression(ES_10,3)
y_pred_10_3 = sineInference(x,3,beta_10_3)
beta_15_3,y_sample_15_3   = sineRegression(ES_15,3)
y_pred_15_3 = sineInference(x,3,beta_15_3)

beta_5_4,y_sample_5_4   = sineRegression(ES_5,4)
y_pred_5_4 = sineInference(x,4,beta_5_4)
beta_10_4,y_sample_10_4   = sineRegression(ES_10,4)
y_pred_10_4 = sineInference(x,4,beta_10_4)
beta_15_4,y_sample_15_4   = sineRegression(ES_15,4)
y_pred_15_4 = sineInference(x,4,beta_15_4)

# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

# degree 2
ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_2,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_2,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_2,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_2,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_2,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_2,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

# plt.savefig("sine_2.png",dpi=150,bbox_inches="tight")


fig.suptitle(r"(1,sin($\pi$,x),sin(2$\pi$,x))")

plt.show()

# degree 3
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_3,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_3,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_3,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_3,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_3,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_3,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

# plt.savefig("sine_3.png",dpi=150,bbox_inches="tight")


fig.suptitle(r"(1,sin($\pi$,x),sin(2$\pi$,x),sin(3$\pi$,x))")

plt.show()

# degree 4
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function") #  check point
ax[0].plot(x,y_pred_5_4,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_4,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title("N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_4,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_4,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title("N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_4,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_4,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title("N = 15")

fig.suptitle(r"(1,sin($\pi$,x),sin(2$\pi$,x),sin(3$\pi$,x),sin(4$\pi$,x))")

# plt.savefig("sine_4.png",dpi=150,bbox_inches="tight")


plt.show()


#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[4], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Nonlinear Regression

Task 5

Implement nonlinear regression for

\(\hat{y} = \beta_1 + \beta_2 x^2*\sin(\beta_3 x + \beta_4)\)

Then try

\(\hat{y} = (\beta_1 x + \beta_2)^2 \sin(\beta_3 x + \beta_4)\)

You will notice that the second model gives better results. This is obvious. But remember that we need a deep intuition to pick the correct functional form in the general case. This is usually not possible. Also you will notice that finding \(\beta\)’s is much more difficult in the case of nonlinear regression that the earlier linear case.

Show the code

#!/bin/python3
"""============================================================================
sine basis linear regression script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc
from numpy.polynomial import chebyshev as C
from scipy.optimize import minimize

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n).flatten()

def TrueFunction(x):
    return (6*x-2)**2*np.sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

def f_obj_1(beta,x,y):
    val = np.mean(np.square(y-(beta[0]+beta[1]*x**2*np.sin(beta[2]*x+beta[3]))))
    return val

def f_obj_2(beta,x,y):
    val = np.mean(np.square(y-(beta[0]*x+beta[1])**2*np.sin(beta[2]*x+beta[3])))
    return val

def inferModel1(x,beta):
    return beta[0]+beta[1]*x**2*np.sin(beta[2]*x+beta[3])

def inferModel2(x,beta):
    return (beta[0]*x+beta[1])**2*np.sin(beta[2]*x+beta[3])

def optimizeModel1(x):
    y = TrueFunction(x)
    #  res = minimize(f_obj_1,[1,0.24,0.5,0.1],args=(x,y))
    res = minimize(f_obj_1,np.random.randint(-5,15,4),args=(x,y))
    return res.x, y

def optimizeModel2(x):
    y = TrueFunction(x)
    #  res = minimize(f_obj_2,[1,0.24,0.5,0.1],args=(x,y))
    res = minimize(f_obj_2,np.random.randint(-5,15,4),args=(x,y))
    return res.x, y


x = np.linspace(0,1,101)

beta_5_2,y_sample_5_2   = optimizeModel1(ES_5)
y_pred_5_2 = inferModel1(x,beta_5_2)
beta_10_2,y_sample_10_2   = optimizeModel1(ES_10)
y_pred_10_2 = inferModel1(x,beta_10_2)
beta_15_2,y_sample_15_2   = optimizeModel1(ES_15)
y_pred_15_2 = inferModel1(x,beta_15_2)

beta_5_3,y_sample_5_3   = optimizeModel2(ES_5)
y_pred_5_3 = inferModel2(x,beta_5_3)
beta_10_3,y_sample_10_3   = optimizeModel2(ES_10)
y_pred_10_3 = inferModel2(x,beta_10_3)
beta_15_3,y_sample_15_3   = optimizeModel2(ES_15)
y_pred_15_3 = inferModel2(x,beta_15_3)

# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

# degree 2
ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_2,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_2,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
b0 = str(np.round(beta_5_2[0],2))
b1 = str(np.round(beta_5_2[1],2))
b2 = str(np.round(beta_5_2[2],2))
b3 = str(np.round(beta_5_2[3],2))
ax[0].set_title(r"N = 5, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_2,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_2,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
b0 = str(np.round(beta_10_2[0],2))
b1 = str(np.round(beta_10_2[1],2))
b2 = str(np.round(beta_10_2[2],2))
b3 = str(np.round(beta_10_2[3],2))
ax[1].set_title(r"N = 10, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_2,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_2,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
b0 = str(np.round(beta_15_2[0],2))
b1 = str(np.round(beta_15_2[1],2))
b2 = str(np.round(beta_15_2[2],2))
b3 = str(np.round(beta_15_2[3],2))
ax[2].set_title(r"N = 15, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

# plt.savefig("model1.png",dpi=150,bbox_inches="tight")


fig.suptitle(r"$\hat{y} = \beta_1+\beta_2 x^2\sin(\beta_3 x+\beta_4)$")

plt.show()

# degree 3
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_3,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_3,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
b0 = str(np.round(beta_5_3[0],2))
b1 = str(np.round(beta_5_3[1],2))
b2 = str(np.round(beta_5_3[2],2))
b3 = str(np.round(beta_5_3[3],2))
ax[0].set_title(r"N = 5, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_3,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_3,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
b0 = str(np.round(beta_10_3[0],2))
b1 = str(np.round(beta_10_3[1],2))
b2 = str(np.round(beta_10_3[2],2))
b3 = str(np.round(beta_10_3[3],2))
ax[1].set_title(r"N = 10, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_3,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_3,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
b0 = str(np.round(beta_15_3[0],2))
b1 = str(np.round(beta_15_3[1],2))
b2 = str(np.round(beta_15_3[2],2))
b3 = str(np.round(beta_15_3[3],2))
ax[2].set_title(r"N = 15, $\beta$=["+b0+", "+b1+", "+b2+", "+b3+"]")

# plt.savefig("model2.png",dpi=150,bbox_inches="tight")


fig.suptitle(r"$\hat{y} = (\beta_1x+\beta_2)^2\sin(\beta_3 x+\beta_4)$")

plt.show()


#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[5], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Piecewise Cubic Spline Interpolation

Task 6

This example is a bit more involved. You will need to learn about the cubic spline interpolation. Then use a suitable library to get the interpolation.

Show the code

#!/bin/python3
"""============================================================================
sine basis linear regression script

Ramkumar
Sun Mar 23 05:11:57 PM IST 2025
============================================================================"""

# importing needed modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import qmc
from numpy.polynomial import chebyshev as C
from scipy.optimize import minimize
from scipy.interpolate import CubicSpline

#==============================================================================

# function definitions for sampling
def RandomSampling(n):
    return np.random.sample(n)

def EquispacedSampling(n):
    return np.linspace(0,1,n)

def LatinHypercubeSampling(n):
    sampler = qmc.LatinHypercube(d=1)
    return sampler.random(n).flatten()

def TrueFunction(x):
    return (6*x-2)**2*np.sin(12*x-4)

# sampling data

N     = 5
RS_5  = RandomSampling(N)
ES_5  = EquispacedSampling(N)
LHS_5 = LatinHypercubeSampling(N)

N      = 10
RS_10  = RandomSampling(N)
ES_10  = EquispacedSampling(N)
LHS_10 = LatinHypercubeSampling(N)

N      = 15
RS_15  = RandomSampling(N)
ES_15  = EquispacedSampling(N)
LHS_15 = LatinHypercubeSampling(N)

x = np.linspace(0,1,101)

cs_5_2 = CubicSpline(ES_5,TrueFunction(ES_5))
y_sample_5_2 = cs_5_2(ES_5)
y_pred_5_2 = cs_5_2(x)

cs_10_2 = CubicSpline(ES_10,TrueFunction(ES_10))
y_sample_10_2 = cs_10_2(ES_10)
y_pred_10_2 = cs_10_2(x)

cs_15_2 = CubicSpline(ES_15,TrueFunction(ES_15))
y_sample_15_2 = cs_15_2(ES_15)
y_pred_15_2 = cs_15_2(x)


# plotting graphs
plt.rcParams.update({"font.size":10})
fig,ax = plt.subplots(3,1,figsize=(12,6),sharex=True,sharey=True)
x = np.linspace(0,1,101)
ax = ax.flatten()

# degree 2
ax[0].plot(x,TrueFunction(x),'-k',label = "True function")
ax[0].plot(x,y_pred_5_2,'-r',label = "predicted")
ax[0].plot(ES_5,y_sample_5_2,'og',label="sample")
ax[0].grid()
#  ax[0].set_xlabel("data points")
ax[0].set_ylabel("y")
ax[0].set_title(r"N = 5")

ax[1].plot(x,TrueFunction(x),'-k',label = "True function")
ax[1].plot(x,y_pred_10_2,'-r',label = "predicted")
ax[1].plot(ES_10,y_sample_10_2,'og',label="sample")
ax[1].grid()
ax[1].legend(loc=[1.01,0.5])
#  ax[1].set_xlabel("data points")
ax[1].set_ylabel("y")
ax[1].set_title(r"N = 10")

ax[2].plot(x,TrueFunction(x),'-k',label = "True function")
ax[2].plot(x,y_pred_15_2,'-r',label = "predicted")
ax[2].plot(ES_15,y_sample_15_2,'og',label="sample")
ax[2].grid()
ax[2].set_xlabel("x")
ax[2].set_ylabel("y")
ax[2].set_title(r"N = 15")

# plt.savefig("spline.png",dpi=150,bbox_inches="tight")

fig.suptitle("Cubic Spline")

plt.show()

#==============================================================================

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[6], line 11
      9 # importing needed modules
     10 import numpy as np
---> 11 import pandas as pd
     12 import matplotlib.pyplot as plt
     13 from scipy.stats import qmc

ModuleNotFoundError: No module named 'pandas'

Deliverables

You should deliver a pdf report and a zip file containing all the codes.

In each case, you should report the mean squared error of the model with respect to the true function. You should also plot the true function and the model for each case.

Your general conclusions about the advantages and disadvantages of the various sampling techniques and regression models should be included in the report.

Fit each model with all the sampling techniques and compare the results.

Submission

Submission deadline is 23:59 on 31st March. Email all the assignments to ramkumars.24@res.iist.ac.in.

Show the code

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 1, 100)
y = (6*x-2)**2 * np.sin(12*x-4)

# plt.plot(x, y)
# plt.xlabel('x')
# plt.ylabel('y')
# plt.title('True function')
# plt.show()

# Task 1 - Sampling
def true_function(x):
    return (6*x-2)**2 * np.sin(12*x-4)

def random_sampling(n):
    x = np.random.rand(n)
    return x

def equi_spaced_sampling(n):
    x = np.linspace(0, 1, n)
    return x

def latin_hypercube_sampling(n):
# Implementing LHS using SCiPy.stats.qmc

    from scipy.stats import qmc
    sampler = qmc.LatinHypercube(d=1,scramble=True)
    sample = sampler.random(n)
    x = sample[:, 0]
    return x

# Plotting the samples for n=5 with all three methods in the same plot

# points = 5
# x = random_sampling(points)
# plt.plot(x, 'or', label='Random')

# x = equi_spaced_sampling(points)
# plt.plot(x, '+g', label='Equi-spaced')

# x = latin_hypercube_sampling(points)
# plt.plot(x, 'xb', label='Latin Hypercube')

# plt.xlabel('Data Points')
# plt.ylabel('x')
# plt.grid()
# plt.legend()
# plt.title('Sampling')
# plt.show()

# Task 2 - Linear Regression

def linear_regression(x, y, degree):
    X = np.vander(x, degree+1, increasing=True)
    beta = np.linalg.inv(X.T @ X) @ X.T @ y
    return beta

def predict(x, beta):
    X = np.vander(x, len(beta), increasing=True)
    y = X @ beta
    return y

# Linear regression example with 5 samples and degree 1

x = random_sampling(5)
y = true_function(x)

beta = linear_regression(x, y, 2)
y_hat = predict(x, beta)

# Mean squared error
mse = np.mean((y - y_hat)**2)
print('Mean squared error:', mse)

xx = np.linspace(0, 1, 100)
plt.plot(xx, true_function(xx), 'r', label='True')
yy_hat = predict(xx, beta)
plt.plot(xx, yy_hat, 'b', label='Predicted')
plt.plot(x, y_hat, 'og', label='Samples')

plt.xlabel('x'); plt.ylabel('y')
plt.legend(); plt.grid()
plt.title('Linear regression with degree 2')
plt.show()

Mean squared error: 1.0231030299631143