===============================================================================
                    MACHINE LEARNING PRACTICALS - JOURNAL
===============================================================================

===============================================================================
PRACTICAL 1: FEATURE ENGINEERING AND DATA PREPROCESSING
(Handling missing values, Encoding categorical variables, Scaling features)
===============================================================================

------- 1.1: ENCODING (encoding.py) -------

CODE:
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

df=pd.read_csv("customer.csv")
print(df)

df1=df.iloc[:,2:]
print(df1)

x_train,x_test,y_train,y_test=train_test_split(df1.iloc[:,0:2],df1.iloc[:,-1],test_size=0.1)
print("XTrain: \n",x_train)
print("Ytrain: \n",y_train)
print("XTEST: \n",x_test)
print("YTEST: \n",y_test)

#ordinal Encoding
oe=OrdinalEncoder(categories=[['Poor','Average','Good'],['HSC','UG','PG']])
oe.fit(x_train)
x_train=oe.transform(x_train)
x_test=oe.transform(x_test)
print(x_train)

#Label Encoder
le=LabelEncoder()
le.fit(y_train)
y_train=le.transform(y_train)
y_test=le.transform(y_test)
print(y_train)

#Onehot Encoding using pandas
df2=df.iloc[:,1:2]
encod=OneHotEncoder(sparse_output=False)
encoded=encod.fit_transform(df2)
print("Feature Names:")
print(encod.get_feature_names_out())
print(encoded)

OUTPUT:
age  Gender   review education Purchase
0    NaN    Male     Good       HSC      yes
1   48.0    Male     Good        PG       no
2   68.0  Female  Average        UG       no
3   77.0  Female  Average        PG      yes
4   26.0    Male     Poor        PG      yes
...
[14 rows x 5 columns]

     review education Purchase
0      Good       HSC      yes
1      Good        PG       no
2   Average        UG       no
3   Average        PG      yes
...
[14 rows x 3 columns]

XTrain:
      review education
3   Average        PG
8      Good        UG
6      Good        PG
2   Average        UG
...
[12 rows x 2 columns]

Ytrain:
 3     yes
8     yes
6     yes
2      no
...
Name: Purchase, dtype: object

XTEST:
    review education
5    Good        UG
13   Good        UG

YTEST:
 5      no
13    yes
Name: Purchase, dtype: object

[[1. 2.]
 [2. 1.]
 [2. 2.]
 [1. 1.]
 ...
 [0. 2.]]

[1 1 1 0 0 0 0 1 1 1 1 1]

Feature Names:
['Gender_Female' 'Gender_Male']

[[0. 1.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 ...
 [1. 0.]]

------- 1.2: BOXPLOT AND HISTOGRAM (boxplot.py) -------

CODE:
import matplotlib.pyplot as plt
import numpy as np
arr=np.array([100,120,110,150,110,140,130,170,120,220,140,110])
arr1=np.sort(arr)
print(arr1)
mean=np.mean(arr)
print("MEAN=",mean)
median=np.median(arr)
print("MEDIAN=",median)
q1=np.percentile(arr,25)
print("Quarter 1=",q1)
q3=np.percentile(arr1,75)
print("Quarter 3=",q3)
plt.boxplot(arr)
plt.show()
plt.hist(arr)
plt.show()

OUTPUT:
[100 110 110 110 120 120 130 140 140 150 170 220]
MEAN= 135.0
MEDIAN= 125.0
Quarter 1= 110.0
Quarter 3= 142.5

------- 1.3: CORRELATION WITH TARGET (corela_target.py) -------

CODE:
import pandas as pd

data = {
    'sqft': [1500, 1600, 1700, 1800, 1900],
    'rooms': [3, 3, 4, 4, 5],
    'roof_color': [1, 2, 1, 2, 1],
    'price': [300000, 320000, 340000, 360000, 380000]
}

df = pd.DataFrame(data)
correlation_matrix = df.corr(numeric_only=True)
print("🔁 Full Correlation Matrix:")
print(correlation_matrix.round(2))

correlation = df.corr()['price'].drop('price')
print(correlation)

selected_features = correlation[correlation.abs() > 0.3].index
print("Selected features:", list(selected_features))

OUTPUT:
🔁 Full Correlation Matrix:
            sqft  rooms  roof_color  price
sqft        1.00   0.94        0.00   1.00
rooms       0.94   1.00       -0.33   0.94
roof_color  0.00  -0.33        1.00   0.00
price       1.00   0.94        0.00   1.00

sqft          1.000000e+00
rooms         9.449112e-01
roof_color    5.250970e-17
Name: price, dtype: float64

Selected features: ['sqft', 'rooms']

------- 1.4: COLUMN TRANSFORMER ENCODING (column_trans_encod.py) -------

CODE:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder,LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

df = pd.read_csv("customer.csv")
print(df)

x=df.iloc[:,:4]
y=df.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.1)

trans = ColumnTransformer(
    transformers=[
        ('impute_age', SimpleImputer(), ['age']),
        ('onehot_gender', OneHotEncoder(sparse_output=False), ['Gender']),
        ('ordinal_rating', OrdinalEncoder(categories=[['Poor', 'Average', 'Good']]), ['review']),
        ('ordinal_education', OrdinalEncoder(categories=[['HSC', 'UG', 'PG']]), ['education'])
    ],
    remainder='passthrough'
)

x_train = trans.fit_transform(x_train)
x_test =trans.fit_transform(x_test)
print("\nTransformed XTrain:\n", x_train)
print("\nTransformed XTest:\n", x_test)

le=LabelEncoder()
y_train1=le.fit_transform(y_train)
y_test1=le.fit_transform(y_test)
print("\nTransformed YTrain:\n", y_train1)
print("\nTransformed YTest:\n", y_test1)

OUTPUT:
     age  Gender   review education Purchase
0    NaN    Male     Good       HSC      yes
1   48.0    Male     Good        PG       no
2   68.0  Female  Average        UG       no
3   77.0  Female  Average        PG      yes
4   26.0    Male     Poor        PG      yes
...
[14 rows x 5 columns]

Transformed XTrain:
 [[55.  0.  1.  0.  2.]
 [18.  0.  1.  2.  1.]
 [44.  0.  1.  2.  1.]
 [50.  1.  0.  2.  1.]
 ...
 [26.  0.  1.  0.  2.]]

Transformed XTest:
 [[77.  1.  2.  0.]
 [77.  1.  1.  2.]]

Transformed YTrain:
 [1 0 1 1 1 0 1 0 0 0 1 1]

Transformed YTest:
 [0 0]

------- 1.5: CORRELATION BETWEEN FEATURES (corel_bt_feat.py) -------

CODE:
import pandas as pd

data = {
    'sqft': [1500, 1600, 1700, 1800, 1900],
    'rooms': [3, 3, 4, 4, 5],
    'bathrooms': [1, 2, 2, 2, 3],
    'roof_color': [1, 2, 1, 2, 1],
    'price': [300000, 320000, 340000, 360000, 380000]
}
df = pd.DataFrame(data)
feature_corr = df.drop(columns='price').corr()
print("Correlation between features:")
print(feature_corr.round(2))

OUTPUT:
Correlation between features:
            sqft  rooms  bathrooms  roof_color
sqft        1.00   0.94       0.89        0.00
rooms       0.94   1.00       0.85       -0.33
bathrooms   0.89   0.85       1.00        0.00
roof_color  0.00  -0.33       0.00        1.00


===============================================================================
PRACTICAL 2: PRINCIPAL COMPONENT ANALYSIS (PCA)
(Dimensionality Reduction while retaining maximum variance)
===============================================================================

CODE:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

df=pd.read_csv("student_dataset.csv")
print(df)

scaler=StandardScaler()
df1=scaler.fit_transform(df.iloc[:,:3])
print(df1)

cov_matrix = np.cov(df1.T)
print("COVARIANCE MATRIX:\n", cov_matrix)

eig_val,eig_vect=np.linalg.eig(cov_matrix)
print("\nEigen Values\n",eig_val)
print("Eigen Vectors\n",eig_vect)

pc = eig_vect[:,[0, 2]]
pc=pc.T
print("\nTop 2 Principal Components:\n", pc)

trans_df = np.dot(df1[:,0:3], pc.T)
print(" \nNew Transform\n",trans_df)

Dataf=pd.DataFrame(trans_df,columns=['PC1','PC2'])
Dataf['GTU Marks']=df['GTU'].values
print(Dataf)

OUTPUT:
    Mid_Sem   IQ  HSC  GTU
0        35  110   78   70
1        42  125   85   88
2        28  100   72   65
3        45  130   90   92
4        38  115   80   78
...
[15 rows x 4 columns]

[[-0.09736702 -0.20785572 -0.20441405]
 [ 1.03858157  1.20934235  0.81765621]
 [-1.23331562 -1.15265443 -1.08047428]
 [ 1.52541669  1.68174171  1.5477064 ]
 ...
 [-0.74648051 -0.96369469 -0.93446424]]

COVARIANCE MATRIX:
 [[1.07142857 1.0614152  1.05676449]
 [1.0614152  1.07142857 1.05019437]
 [1.05676449 1.05019437 1.07142857]]

Eigen Values
 [3.18368463 0.00878971 0.02181137]

Eigen Vectors
 [[-0.57842869 -0.7974863  -0.17156877]
 [-0.57723546  0.54876897 -0.60469152]
 [-0.57638483  0.25073535  0.77776109]]

Top 2 Principal Components:
 [[-0.57842869 -0.57723546 -0.57638483]
 [-0.17156877 -0.60469152  0.77776109]]

New Transform
 [[ 0.29412273 -0.01659157]
 [-1.77010531 -0.27352604]
 [ 2.00150714  0.06824795]
 [-2.74518022 -0.074903  ]
 ...
 [ 1.5266755  -0.01597918]]

         PC1       PC2  GTU Marks
0   0.294123 -0.016592         70
1  -1.770105 -0.273526         88
2   2.001507  0.068248         65
3  -2.745180 -0.074903         92
4  -0.428478 -0.158651         78
...
[15 rows x 3 columns]


===============================================================================
PRACTICAL 3: DECISION TREE CLASSIFIER
(Classification with evaluation using precision, recall, and F1-score)
===============================================================================

CODE:
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

data = pd.read_csv("decesiontree.csv")
print(data)

cleanup_nums = {"Age":     {"Youth": 0, "Middle": 1, "Senior" : 2},
                "Income": {"Low": 0, "Medium": 1, "High" : 2 },
                "Student": {"No": 0, "Yes":1  },
                "Credit Rating": { "Fair": 1, "Excellent" : 2 },
                "Buys-Computer": {"No": 0, "Yes": 1}}
data.replace(cleanup_nums, inplace = True)
print(data)

predictors = data.iloc[:, 1:5]
target = data.iloc[:, 5]

dtree_entropy=DecisionTreeClassifier(criterion="entropy",random_state=100,
                                     max_depth=3,min_samples_leaf=5)

OUTPUT:
    Item no     Age  Income Student Credit Rating Buys-Computer
0         1   Youth    High      No          Fair            No
1         2   Youth    High      No     Excellent            No
2         3  Middle    High      No          Fair           Yes
3         4  Senior  Medium      No          Fair           Yes
4         5  Senior     Low     Yes          Fair           Yes
...
[14 rows x 6 columns]

    Item no  Age  Income  Student  Credit Rating  Buys-Computer
0         1    0       2        0              1              0
1         2    0       2        0              2              0
2         3    1       2        0              1              1
3         4    2       1        0              1              1
4         5    2       0        1              1              1
...
[14 rows x 6 columns]


===============================================================================
PRACTICAL 4: NAIVE BAYES CLASSIFIER
(Probabilistic classification using Gaussian Naive Bayes)
===============================================================================

CODE:
import pandas as pd
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB

fl = "Naive_Bayesian.csv"
df = pd.read_csv(fl, index_col = "Item no")
print (df)

dfCol = df.columns
print ("df columns: ", dfCol)
ndfCol = df.shape[1]
ndfRow = df.shape[0]

feature = [[]*ndfRow for x in range(ndfCol)]
for i in range(ndfCol):
	feature[i] = list(df[dfCol[i]])
	print (dfCol[i],":", feature[i])

le = preprocessing.LabelEncoder()

feature0 = [[]*ndfRow for x in range(ndfCol)]
for i in range(ndfCol):
	feature0[i] = le.fit_transform(feature[i])
	print(dfCol[i], "encoded:", feature0[i])

features = []
for i in range(ndfRow):
	xlst = []
	for j in range(ndfCol-1):
		xlst.append(feature0[j][i])
	xtup = tuple(xlst)
	features.append(xtup)

print ("features:", features)

label = feature0[:][ndfCol-1]
label = [label[i]+1 for i in range(ndfRow)]
print ("label:", label)

model = GaussianNB()
model.fit(features, label)
print ("model:", model)

ptStr = input ("Enter unknown data (separated by ,) excluding Index Column: ")
ptLst = [int(x) for x in ptStr.split(',')]
point1 = [ptLst]
print ("Unknown data (sample):", point1)
predicted= model.predict(point1)
print ("Class for Point:", point1, "is:", predicted)

OUTPUT (with input: 0,1,1,0):
            Age  Income Student Credit Rating Buys-Computer
Item no
1         Youth    High      No          Fair            No
2         Youth    High      No     Excellent            No
3        Middle    High      No          Fair           Yes
4        Senior  Medium      No          Fair           Yes
...
[14 rows x 5 columns]

df columns:  Index(['Age', 'Income', 'Student', 'Credit Rating', 'Buys-Computer'], dtype='object')

Age : ['Youth', 'Youth', 'Middle', 'Senior', 'Senior', ...]
Income : ['High', 'High', 'High', 'Medium', 'Low', ...]
Student : ['No', 'No', 'No', 'No', 'Yes', ...]
Credit Rating : ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', ...]
Buys-Computer : ['No', 'No', 'Yes', 'Yes', 'Yes', ...]

Age encoded: [2 2 0 1 1 0 1 2 2 1 2 0 0 1]
Income encoded: [0 0 0 2 1 1 1 2 1 2 2 2 0 2]
Student encoded: [0 0 0 0 1 1 1 0 1 1 1 0 1 1]
Credit Rating encoded: [1 0 1 1 1 0 0 1 1 1 0 0 1 0]
Buys-Computer encoded: [0 0 1 1 1 0 1 0 1 1 1 1 1 0]

features: [(2, 0, 0, 1), (2, 0, 0, 0), (0, 0, 0, 1), (1, 2, 0, 1),
           (1, 1, 1, 1), (0, 1, 1, 0), ...]

label: [1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1]

model: GaussianNB()

Enter unknown data (separated by ,) excluding Index Column:
Unknown data (sample): [[0, 1, 1, 0]]
Class for Point: [[0, 1, 1, 0]] is: [2]


===============================================================================
PRACTICAL 5: LINEAR REGRESSION
(Predicting continuous values with evaluation using MSE and R² score)
===============================================================================

CODE:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn import metrics

dataset=pd.read_csv("LinearRegression.csv")
print(dataset)

x=dataset.iloc[:,0:1]
y=dataset.iloc[:,1]
y=y.replace(['Yes','No'],[1,0])

print(y)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=123)

model = LinearRegression()
model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_val=model.predict([[18]])
print(y_pred_val)

if(y_pred_val > 0.5):
    print("Yes")
else:
    print("No")

plt.scatter(X_train,y_train, color = 'red')
plt.plot(X_train, model.predict(X_train))

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

OUTPUT:
Outside Temperature \nCelcius Wear a\n jacket
0                             30              No
1                             25              No
2                             20              No
3                             15             Yes
4                             10             Yes

0    0
1    0
2    0
3    1
4    1
Name: Wear a\n jacket, dtype: int64

[0.54285714]
Yes

Mean Absolute Error: 0.14285714285714302
Mean Squared Error: 0.02040816326530617
Root Mean Squared Error: 0.14285714285714302


===============================================================================
PRACTICAL 6: K-NEAREST NEIGHBORS (KNN) CLASSIFIER
(Classification using different k values with accuracy evaluation)
===============================================================================

CODE:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv("knn.csv")
df = df[df['Item no.'].notna()]
print("Dataset Preview:")
print(df.head())

X = df.iloc[:, 1:4]
y = df.iloc[:, 4]
print("INPUT\n",X)
print("OUTPUT\n",y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print("XTRAIN\n",X_train)
print("X_TEST\n",X_test)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
print("PREDICTION   : \n",y_pred)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

OUTPUT:
Dataset Preview:
   Item no.  Temp  Humidity  Wind Speed  Play  ...
0       1.0  85.0      85.0        12.0    No  ...
1       2.0  80.0      90.0         9.0    No  ...
2       3.0  83.0      86.0         4.0   Yes  ...
3       4.0  70.0      96.0         3.0   Yes  ...
4       5.0  68.0      80.0         5.0   Yes  ...

INPUT
     Temp  Humidity  Wind Speed
0   85.0      85.0        12.0
1   80.0      90.0         9.0
2   83.0      86.0         4.0
3   70.0      96.0         3.0
4   68.0      80.0         5.0
...
[14 rows x 3 columns]

OUTPUT
 0      No
1      No
2     Yes
3     Yes
4     Yes
...
Name: Play, dtype: object

XTRAIN
 [[ 1.37690922 -0.53048047 -0.46006855]
 [-1.22885447 -0.99359834  2.25104967]
 [-0.57741354 -0.99359834 -0.46006855]
 [ 1.70262968  0.48837885 -0.64080976]
 ...
 [-1.3917147  -1.45671621 -1.00229219]]

X_TEST
 [[ 0.39974784 -0.0673626  -1.00229219]
 [-0.08883285  0.85887314 -0.64080976]
 [ 2.02835014  0.39575527  0.80511996]]

PREDICTION   :
 ['Yes' 'Yes' 'Yes']

Accuracy: 0.6666666666666666

Classification Report:
              precision    recall  f1-score   support

          No       0.00      0.00      0.00         1
         Yes       0.67      1.00      0.80         2

    accuracy                           0.67         3
   macro avg       0.33      0.50      0.40         3
weighted avg       0.44      0.67      0.53         3


===============================================================================
PRACTICAL 7: MULTIPLE LINEAR REGRESSION
(Prediction using multiple features with R² score and RMSE evaluation)
===============================================================================

CODE:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

Data=pd.read_excel("student_data1.xlsx")
print(Data)

X=Data.iloc[:,:2]
y=Data.iloc[:,-1:]
print(X)
print(y)

X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2,random_state=42)

print("Xtrain\n",X_train)
print("XTEST\n",y_test)

model = LinearRegression()
model.fit(X_train.to_numpy(), y_train)

y_pred = model.predict([[8.6,125]])
print("model prediction on Ytest:\n",y_pred.round(2))

print("M= ",model.coef_.round(2))
print("b= ",model.intercept_.round(2))

OUTPUT:
CGPA   IQ  Placement (LPA)
0   7.5  110              6.5
1   8.0  120              7.0
2   8.5  125              8.2
3   9.0  130              9.1
4   6.5  100              5.0
...
[10 rows x 3 columns]

   CGPA   IQ
0   7.5  110
1   8.0  120
2   8.5  125
3   9.0  130
4   6.5  100
...
[10 rows x 2 columns]

   Placement (LPA)
0              6.5
1              7.0
2              8.2
3              9.1
4              5.0
...
[10 rows x 1 columns]

Xtrain
    CGPA   IQ
5   7.0  105
0   7.5  110
7   8.8  128
2   8.5  125
...
[8 rows x 2 columns]

XTEST
    Placement (LPA)
8              5.2
1              7.0

model prediction on Ytest:
 [[8.45]]

M=  [[1.32 0.03]]
b=  [-6.51]


===============================================================================
PRACTICAL 8: SINGULAR VALUE DECOMPOSITION (SVD)
(Dimensionality Reduction using SVD - Manual & Sklearn Implementation)
===============================================================================

------- 8.1: SVD MANUAL IMPLEMENTATION (svd.py) -------

CODE:
import pandas as pd
import numpy as np

df = pd.read_excel("student_dataset.xlsx")
A = df.iloc[:, :3].to_numpy()
A_mean = A - np.mean(A, axis=0)

U, X, V_T = np.linalg.svd(A_mean)
k = 2
U_k = U[:, :k]
S_k = np.diag(X[:k])

final_data1 = np.dot(U_k, S_k)
print("Reduced Data:\n", final_data1)

explained_variance = (X[:k]**2) / np.sum(X**2)
print("Explained variance by top 2 components:", explained_variance)

reduced_df = pd.DataFrame(final_data1, columns=["PC1", "PC2"])
reduced_df['GTU'] = df['GTU'].values
print(reduced_df)

OUTPUT:
Reduced Data:
 [[ -2.60622042   0.08983428]
 [ 15.20533711  -2.22651162]
 [-16.15266994   0.28962606]
 [ 22.72992624  -0.77843972]
 [  3.46193108  -0.87192496]
 ...
 [-12.83777493   0.27648847]]

Explained variance by top 2 components: [0.99132896 0.00672569]

          PC1       PC2  GTU
0   -2.606220  0.089834   70
1   15.205337 -2.226512   88
2  -16.152670  0.289626   65
3   22.729926 -0.778440   92
4    3.461931 -0.871925   78
...
[15 rows x 3 columns]

------- 8.2: SVD USING SKLEARN (svd2.py) -------

CODE:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler

df = pd.read_excel("student_dataset.xlsx")
X = df.iloc[:, :3]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_scaled)
print(X_reduced)

Dataf=pd.DataFrame(X_reduced,columns=['PC1','PC2'])
Dataf['GTU Marks']=df['GTU'].values
print(Dataf)

print("Singular values:", svd.singular_values_)
print("Explained variance:", svd.explained_variance_)
print("Explained variance ratio:", svd.explained_variance_ratio_)
print("Total variance captured:", svd.explained_variance_ratio_.sum())

OUTPUT:
[[-0.29412273 -0.01659157]
 [ 1.77010531 -0.27352604]
 [-2.00150714  0.06824795]
 [ 2.74518022 -0.074903  ]
 [ 0.42847827 -0.1586513 ]
 ...
 [-1.5266755  -0.01597918]]

         PC1       PC2  GTU Marks
0  -0.294123 -0.016592         70
1   1.770105 -0.273526         88
2  -2.001507  0.068248         65
3   2.745180 -0.074903         92
4   0.428478 -0.158651         78
...
[15 rows x 3 columns]

Singular values: [6.67619539 0.55259316]
Explained variance: [2.97143899 0.02035728]
Explained variance ratio: [0.99047966 0.00678576]
Total variance captured: 0.997265423131314


===============================================================================
                                END OF JOURNAL
===============================================================================