Ultima attività 1 month ago

Aunali ha revisionato questo gist 1 month ago. Vai alla revisione

1 file changed, 852 insertions

practicals.txt(file creato)

@@ -0,0 +1,852 @@
1 + ===============================================================================
2 + MACHINE LEARNING PRACTICALS - JOURNAL
3 + ===============================================================================
4 +
5 + ===============================================================================
6 + PRACTICAL 1: FEATURE ENGINEERING AND DATA PREPROCESSING
7 + (Handling missing values, Encoding categorical variables, Scaling features)
8 + ===============================================================================
9 +
10 + ------- 1.1: ENCODING (encoding.py) -------
11 +
12 + CODE:
13 + import pandas as pd
14 + from sklearn.preprocessing import OrdinalEncoder
15 + from sklearn.model_selection import train_test_split
16 + from sklearn.preprocessing import LabelEncoder
17 + from sklearn.preprocessing import OneHotEncoder
18 +
19 + df=pd.read_csv("customer.csv")
20 + print(df)
21 +
22 + df1=df.iloc[:,2:]
23 + print(df1)
24 +
25 + x_train,x_test,y_train,y_test=train_test_split(df1.iloc[:,0:2],df1.iloc[:,-1],test_size=0.1)
26 + print("XTrain: \n",x_train)
27 + print("Ytrain: \n",y_train)
28 + print("XTEST: \n",x_test)
29 + print("YTEST: \n",y_test)
30 +
31 + #ordinal Encoding
32 + oe=OrdinalEncoder(categories=[['Poor','Average','Good'],['HSC','UG','PG']])
33 + oe.fit(x_train)
34 + x_train=oe.transform(x_train)
35 + x_test=oe.transform(x_test)
36 + print(x_train)
37 +
38 + #Label Encoder
39 + le=LabelEncoder()
40 + le.fit(y_train)
41 + y_train=le.transform(y_train)
42 + y_test=le.transform(y_test)
43 + print(y_train)
44 +
45 + #Onehot Encoding using pandas
46 + df2=df.iloc[:,1:2]
47 + encod=OneHotEncoder(sparse_output=False)
48 + encoded=encod.fit_transform(df2)
49 + print("Feature Names:")
50 + print(encod.get_feature_names_out())
51 + print(encoded)
52 +
53 + OUTPUT:
54 + age Gender review education Purchase
55 + 0 NaN Male Good HSC yes
56 + 1 48.0 Male Good PG no
57 + 2 68.0 Female Average UG no
58 + 3 77.0 Female Average PG yes
59 + 4 26.0 Male Poor PG yes
60 + ...
61 + [14 rows x 5 columns]
62 +
63 + review education Purchase
64 + 0 Good HSC yes
65 + 1 Good PG no
66 + 2 Average UG no
67 + 3 Average PG yes
68 + ...
69 + [14 rows x 3 columns]
70 +
71 + XTrain:
72 + review education
73 + 3 Average PG
74 + 8 Good UG
75 + 6 Good PG
76 + 2 Average UG
77 + ...
78 + [12 rows x 2 columns]
79 +
80 + Ytrain:
81 + 3 yes
82 + 8 yes
83 + 6 yes
84 + 2 no
85 + ...
86 + Name: Purchase, dtype: object
87 +
88 + XTEST:
89 + review education
90 + 5 Good UG
91 + 13 Good UG
92 +
93 + YTEST:
94 + 5 no
95 + 13 yes
96 + Name: Purchase, dtype: object
97 +
98 + [[1. 2.]
99 + [2. 1.]
100 + [2. 2.]
101 + [1. 1.]
102 + ...
103 + [0. 2.]]
104 +
105 + [1 1 1 0 0 0 0 1 1 1 1 1]
106 +
107 + Feature Names:
108 + ['Gender_Female' 'Gender_Male']
109 +
110 + [[0. 1.]
111 + [0. 1.]
112 + [1. 0.]
113 + [1. 0.]
114 + ...
115 + [1. 0.]]
116 +
117 + ------- 1.2: BOXPLOT AND HISTOGRAM (boxplot.py) -------
118 +
119 + CODE:
120 + import matplotlib.pyplot as plt
121 + import numpy as np
122 + arr=np.array([100,120,110,150,110,140,130,170,120,220,140,110])
123 + arr1=np.sort(arr)
124 + print(arr1)
125 + mean=np.mean(arr)
126 + print("MEAN=",mean)
127 + median=np.median(arr)
128 + print("MEDIAN=",median)
129 + q1=np.percentile(arr,25)
130 + print("Quarter 1=",q1)
131 + q3=np.percentile(arr1,75)
132 + print("Quarter 3=",q3)
133 + plt.boxplot(arr)
134 + plt.show()
135 + plt.hist(arr)
136 + plt.show()
137 +
138 + OUTPUT:
139 + [100 110 110 110 120 120 130 140 140 150 170 220]
140 + MEAN= 135.0
141 + MEDIAN= 125.0
142 + Quarter 1= 110.0
143 + Quarter 3= 142.5
144 +
145 + ------- 1.3: CORRELATION WITH TARGET (corela_target.py) -------
146 +
147 + CODE:
148 + import pandas as pd
149 +
150 + data = {
151 + 'sqft': [1500, 1600, 1700, 1800, 1900],
152 + 'rooms': [3, 3, 4, 4, 5],
153 + 'roof_color': [1, 2, 1, 2, 1],
154 + 'price': [300000, 320000, 340000, 360000, 380000]
155 + }
156 +
157 + df = pd.DataFrame(data)
158 + correlation_matrix = df.corr(numeric_only=True)
159 + print("🔁 Full Correlation Matrix:")
160 + print(correlation_matrix.round(2))
161 +
162 + correlation = df.corr()['price'].drop('price')
163 + print(correlation)
164 +
165 + selected_features = correlation[correlation.abs() > 0.3].index
166 + print("Selected features:", list(selected_features))
167 +
168 + OUTPUT:
169 + 🔁 Full Correlation Matrix:
170 + sqft rooms roof_color price
171 + sqft 1.00 0.94 0.00 1.00
172 + rooms 0.94 1.00 -0.33 0.94
173 + roof_color 0.00 -0.33 1.00 0.00
174 + price 1.00 0.94 0.00 1.00
175 +
176 + sqft 1.000000e+00
177 + rooms 9.449112e-01
178 + roof_color 5.250970e-17
179 + Name: price, dtype: float64
180 +
181 + Selected features: ['sqft', 'rooms']
182 +
183 + ------- 1.4: COLUMN TRANSFORMER ENCODING (column_trans_encod.py) -------
184 +
185 + CODE:
186 + import pandas as pd
187 + from sklearn.model_selection import train_test_split
188 + from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder,LabelEncoder
189 + from sklearn.compose import ColumnTransformer
190 + from sklearn.impute import SimpleImputer
191 +
192 + df = pd.read_csv("customer.csv")
193 + print(df)
194 +
195 + x=df.iloc[:,:4]
196 + y=df.iloc[:,-1]
197 +
198 + x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.1)
199 +
200 + trans = ColumnTransformer(
201 + transformers=[
202 + ('impute_age', SimpleImputer(), ['age']),
203 + ('onehot_gender', OneHotEncoder(sparse_output=False), ['Gender']),
204 + ('ordinal_rating', OrdinalEncoder(categories=[['Poor', 'Average', 'Good']]), ['review']),
205 + ('ordinal_education', OrdinalEncoder(categories=[['HSC', 'UG', 'PG']]), ['education'])
206 + ],
207 + remainder='passthrough'
208 + )
209 +
210 + x_train = trans.fit_transform(x_train)
211 + x_test =trans.fit_transform(x_test)
212 + print("\nTransformed XTrain:\n", x_train)
213 + print("\nTransformed XTest:\n", x_test)
214 +
215 + le=LabelEncoder()
216 + y_train1=le.fit_transform(y_train)
217 + y_test1=le.fit_transform(y_test)
218 + print("\nTransformed YTrain:\n", y_train1)
219 + print("\nTransformed YTest:\n", y_test1)
220 +
221 + OUTPUT:
222 + age Gender review education Purchase
223 + 0 NaN Male Good HSC yes
224 + 1 48.0 Male Good PG no
225 + 2 68.0 Female Average UG no
226 + 3 77.0 Female Average PG yes
227 + 4 26.0 Male Poor PG yes
228 + ...
229 + [14 rows x 5 columns]
230 +
231 + Transformed XTrain:
232 + [[55. 0. 1. 0. 2.]
233 + [18. 0. 1. 2. 1.]
234 + [44. 0. 1. 2. 1.]
235 + [50. 1. 0. 2. 1.]
236 + ...
237 + [26. 0. 1. 0. 2.]]
238 +
239 + Transformed XTest:
240 + [[77. 1. 2. 0.]
241 + [77. 1. 1. 2.]]
242 +
243 + Transformed YTrain:
244 + [1 0 1 1 1 0 1 0 0 0 1 1]
245 +
246 + Transformed YTest:
247 + [0 0]
248 +
249 + ------- 1.5: CORRELATION BETWEEN FEATURES (corel_bt_feat.py) -------
250 +
251 + CODE:
252 + import pandas as pd
253 +
254 + data = {
255 + 'sqft': [1500, 1600, 1700, 1800, 1900],
256 + 'rooms': [3, 3, 4, 4, 5],
257 + 'bathrooms': [1, 2, 2, 2, 3],
258 + 'roof_color': [1, 2, 1, 2, 1],
259 + 'price': [300000, 320000, 340000, 360000, 380000]
260 + }
261 + df = pd.DataFrame(data)
262 + feature_corr = df.drop(columns='price').corr()
263 + print("Correlation between features:")
264 + print(feature_corr.round(2))
265 +
266 + OUTPUT:
267 + Correlation between features:
268 + sqft rooms bathrooms roof_color
269 + sqft 1.00 0.94 0.89 0.00
270 + rooms 0.94 1.00 0.85 -0.33
271 + bathrooms 0.89 0.85 1.00 0.00
272 + roof_color 0.00 -0.33 0.00 1.00
273 +
274 +
275 + ===============================================================================
276 + PRACTICAL 2: PRINCIPAL COMPONENT ANALYSIS (PCA)
277 + (Dimensionality Reduction while retaining maximum variance)
278 + ===============================================================================
279 +
280 + CODE:
281 + import pandas as pd
282 + import numpy as np
283 + from sklearn.preprocessing import StandardScaler
284 +
285 + df=pd.read_csv("student_dataset.csv")
286 + print(df)
287 +
288 + scaler=StandardScaler()
289 + df1=scaler.fit_transform(df.iloc[:,:3])
290 + print(df1)
291 +
292 + cov_matrix = np.cov(df1.T)
293 + print("COVARIANCE MATRIX:\n", cov_matrix)
294 +
295 + eig_val,eig_vect=np.linalg.eig(cov_matrix)
296 + print("\nEigen Values\n",eig_val)
297 + print("Eigen Vectors\n",eig_vect)
298 +
299 + pc = eig_vect[:,[0, 2]]
300 + pc=pc.T
301 + print("\nTop 2 Principal Components:\n", pc)
302 +
303 + trans_df = np.dot(df1[:,0:3], pc.T)
304 + print(" \nNew Transform\n",trans_df)
305 +
306 + Dataf=pd.DataFrame(trans_df,columns=['PC1','PC2'])
307 + Dataf['GTU Marks']=df['GTU'].values
308 + print(Dataf)
309 +
310 + OUTPUT:
311 + Mid_Sem IQ HSC GTU
312 + 0 35 110 78 70
313 + 1 42 125 85 88
314 + 2 28 100 72 65
315 + 3 45 130 90 92
316 + 4 38 115 80 78
317 + ...
318 + [15 rows x 4 columns]
319 +
320 + [[-0.09736702 -0.20785572 -0.20441405]
321 + [ 1.03858157 1.20934235 0.81765621]
322 + [-1.23331562 -1.15265443 -1.08047428]
323 + [ 1.52541669 1.68174171 1.5477064 ]
324 + ...
325 + [-0.74648051 -0.96369469 -0.93446424]]
326 +
327 + COVARIANCE MATRIX:
328 + [[1.07142857 1.0614152 1.05676449]
329 + [1.0614152 1.07142857 1.05019437]
330 + [1.05676449 1.05019437 1.07142857]]
331 +
332 + Eigen Values
333 + [3.18368463 0.00878971 0.02181137]
334 +
335 + Eigen Vectors
336 + [[-0.57842869 -0.7974863 -0.17156877]
337 + [-0.57723546 0.54876897 -0.60469152]
338 + [-0.57638483 0.25073535 0.77776109]]
339 +
340 + Top 2 Principal Components:
341 + [[-0.57842869 -0.57723546 -0.57638483]
342 + [-0.17156877 -0.60469152 0.77776109]]
343 +
344 + New Transform
345 + [[ 0.29412273 -0.01659157]
346 + [-1.77010531 -0.27352604]
347 + [ 2.00150714 0.06824795]
348 + [-2.74518022 -0.074903 ]
349 + ...
350 + [ 1.5266755 -0.01597918]]
351 +
352 + PC1 PC2 GTU Marks
353 + 0 0.294123 -0.016592 70
354 + 1 -1.770105 -0.273526 88
355 + 2 2.001507 0.068248 65
356 + 3 -2.745180 -0.074903 92
357 + 4 -0.428478 -0.158651 78
358 + ...
359 + [15 rows x 3 columns]
360 +
361 +
362 + ===============================================================================
363 + PRACTICAL 3: DECISION TREE CLASSIFIER
364 + (Classification with evaluation using precision, recall, and F1-score)
365 + ===============================================================================
366 +
367 + CODE:
368 + import pandas as pd
369 + from sklearn.metrics import confusion_matrix
370 + from sklearn.tree import DecisionTreeClassifier
371 + from sklearn.metrics import accuracy_score
372 + from sklearn.metrics import classification_report
373 +
374 + data = pd.read_csv("decesiontree.csv")
375 + print(data)
376 +
377 + cleanup_nums = {"Age": {"Youth": 0, "Middle": 1, "Senior" : 2},
378 + "Income": {"Low": 0, "Medium": 1, "High" : 2 },
379 + "Student": {"No": 0, "Yes":1 },
380 + "Credit Rating": { "Fair": 1, "Excellent" : 2 },
381 + "Buys-Computer": {"No": 0, "Yes": 1}}
382 + data.replace(cleanup_nums, inplace = True)
383 + print(data)
384 +
385 + predictors = data.iloc[:, 1:5]
386 + target = data.iloc[:, 5]
387 +
388 + dtree_entropy=DecisionTreeClassifier(criterion="entropy",random_state=100,
389 + max_depth=3,min_samples_leaf=5)
390 +
391 + OUTPUT:
392 + Item no Age Income Student Credit Rating Buys-Computer
393 + 0 1 Youth High No Fair No
394 + 1 2 Youth High No Excellent No
395 + 2 3 Middle High No Fair Yes
396 + 3 4 Senior Medium No Fair Yes
397 + 4 5 Senior Low Yes Fair Yes
398 + ...
399 + [14 rows x 6 columns]
400 +
401 + Item no Age Income Student Credit Rating Buys-Computer
402 + 0 1 0 2 0 1 0
403 + 1 2 0 2 0 2 0
404 + 2 3 1 2 0 1 1
405 + 3 4 2 1 0 1 1
406 + 4 5 2 0 1 1 1
407 + ...
408 + [14 rows x 6 columns]
409 +
410 +
411 + ===============================================================================
412 + PRACTICAL 4: NAIVE BAYES CLASSIFIER
413 + (Probabilistic classification using Gaussian Naive Bayes)
414 + ===============================================================================
415 +
416 + CODE:
417 + import pandas as pd
418 + from sklearn import preprocessing
419 + from sklearn.naive_bayes import GaussianNB
420 +
421 + fl = "Naive_Bayesian.csv"
422 + df = pd.read_csv(fl, index_col = "Item no")
423 + print (df)
424 +
425 + dfCol = df.columns
426 + print ("df columns: ", dfCol)
427 + ndfCol = df.shape[1]
428 + ndfRow = df.shape[0]
429 +
430 + feature = [[]*ndfRow for x in range(ndfCol)]
431 + for i in range(ndfCol):
432 + feature[i] = list(df[dfCol[i]])
433 + print (dfCol[i],":", feature[i])
434 +
435 + le = preprocessing.LabelEncoder()
436 +
437 + feature0 = [[]*ndfRow for x in range(ndfCol)]
438 + for i in range(ndfCol):
439 + feature0[i] = le.fit_transform(feature[i])
440 + print(dfCol[i], "encoded:", feature0[i])
441 +
442 + features = []
443 + for i in range(ndfRow):
444 + xlst = []
445 + for j in range(ndfCol-1):
446 + xlst.append(feature0[j][i])
447 + xtup = tuple(xlst)
448 + features.append(xtup)
449 +
450 + print ("features:", features)
451 +
452 + label = feature0[:][ndfCol-1]
453 + label = [label[i]+1 for i in range(ndfRow)]
454 + print ("label:", label)
455 +
456 + model = GaussianNB()
457 + model.fit(features, label)
458 + print ("model:", model)
459 +
460 + ptStr = input ("Enter unknown data (separated by ,) excluding Index Column: ")
461 + ptLst = [int(x) for x in ptStr.split(',')]
462 + point1 = [ptLst]
463 + print ("Unknown data (sample):", point1)
464 + predicted= model.predict(point1)
465 + print ("Class for Point:", point1, "is:", predicted)
466 +
467 + OUTPUT (with input: 0,1,1,0):
468 + Age Income Student Credit Rating Buys-Computer
469 + Item no
470 + 1 Youth High No Fair No
471 + 2 Youth High No Excellent No
472 + 3 Middle High No Fair Yes
473 + 4 Senior Medium No Fair Yes
474 + ...
475 + [14 rows x 5 columns]
476 +
477 + df columns: Index(['Age', 'Income', 'Student', 'Credit Rating', 'Buys-Computer'], dtype='object')
478 +
479 + Age : ['Youth', 'Youth', 'Middle', 'Senior', 'Senior', ...]
480 + Income : ['High', 'High', 'High', 'Medium', 'Low', ...]
481 + Student : ['No', 'No', 'No', 'No', 'Yes', ...]
482 + Credit Rating : ['Fair', 'Excellent', 'Fair', 'Fair', 'Fair', ...]
483 + Buys-Computer : ['No', 'No', 'Yes', 'Yes', 'Yes', ...]
484 +
485 + Age encoded: [2 2 0 1 1 0 1 2 2 1 2 0 0 1]
486 + Income encoded: [0 0 0 2 1 1 1 2 1 2 2 2 0 2]
487 + Student encoded: [0 0 0 0 1 1 1 0 1 1 1 0 1 1]
488 + Credit Rating encoded: [1 0 1 1 1 0 0 1 1 1 0 0 1 0]
489 + Buys-Computer encoded: [0 0 1 1 1 0 1 0 1 1 1 1 1 0]
490 +
491 + features: [(2, 0, 0, 1), (2, 0, 0, 0), (0, 0, 0, 1), (1, 2, 0, 1),
492 + (1, 1, 1, 1), (0, 1, 1, 0), ...]
493 +
494 + label: [1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1]
495 +
496 + model: GaussianNB()
497 +
498 + Enter unknown data (separated by ,) excluding Index Column:
499 + Unknown data (sample): [[0, 1, 1, 0]]
500 + Class for Point: [[0, 1, 1, 0]] is: [2]
501 +
502 +
503 + ===============================================================================
504 + PRACTICAL 5: LINEAR REGRESSION
505 + (Predicting continuous values with evaluation using MSE and R² score)
506 + ===============================================================================
507 +
508 + CODE:
509 + import pandas as pd
510 + import matplotlib.pyplot as plt
511 + from sklearn.linear_model import LinearRegression
512 + from sklearn.model_selection import train_test_split
513 + import numpy as np
514 + from sklearn import metrics
515 +
516 + dataset=pd.read_csv("LinearRegression.csv")
517 + print(dataset)
518 +
519 + x=dataset.iloc[:,0:1]
520 + y=dataset.iloc[:,1]
521 + y=y.replace(['Yes','No'],[1,0])
522 +
523 + print(y)
524 + X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=123)
525 +
526 + model = LinearRegression()
527 + model = model.fit(X_train, y_train)
528 + y_pred = model.predict(X_test)
529 + y_pred_val=model.predict([[18]])
530 + print(y_pred_val)
531 +
532 + if(y_pred_val > 0.5):
533 + print("Yes")
534 + else:
535 + print("No")
536 +
537 + plt.scatter(X_train,y_train, color = 'red')
538 + plt.plot(X_train, model.predict(X_train))
539 +
540 + print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
541 + print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
542 + print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
543 +
544 + OUTPUT:
545 + Outside Temperature \nCelcius Wear a\n jacket
546 + 0 30 No
547 + 1 25 No
548 + 2 20 No
549 + 3 15 Yes
550 + 4 10 Yes
551 +
552 + 0 0
553 + 1 0
554 + 2 0
555 + 3 1
556 + 4 1
557 + Name: Wear a\n jacket, dtype: int64
558 +
559 + [0.54285714]
560 + Yes
561 +
562 + Mean Absolute Error: 0.14285714285714302
563 + Mean Squared Error: 0.02040816326530617
564 + Root Mean Squared Error: 0.14285714285714302
565 +
566 +
567 + ===============================================================================
568 + PRACTICAL 6: K-NEAREST NEIGHBORS (KNN) CLASSIFIER
569 + (Classification using different k values with accuracy evaluation)
570 + ===============================================================================
571 +
572 + CODE:
573 + import pandas as pd
574 + from sklearn.model_selection import train_test_split
575 + from sklearn.preprocessing import StandardScaler
576 + from sklearn.neighbors import KNeighborsClassifier
577 + from sklearn.metrics import accuracy_score, classification_report
578 +
579 + df = pd.read_csv("knn.csv")
580 + df = df[df['Item no.'].notna()]
581 + print("Dataset Preview:")
582 + print(df.head())
583 +
584 + X = df.iloc[:, 1:4]
585 + y = df.iloc[:, 4]
586 + print("INPUT\n",X)
587 + print("OUTPUT\n",y)
588 +
589 + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
590 +
591 + scaler = StandardScaler()
592 + X_train = scaler.fit_transform(X_train)
593 + X_test = scaler.transform(X_test)
594 + print("XTRAIN\n",X_train)
595 + print("X_TEST\n",X_test)
596 +
597 + knn = KNeighborsClassifier(n_neighbors=3)
598 + knn.fit(X_train, y_train)
599 +
600 + y_pred = knn.predict(X_test)
601 + print("PREDICTION : \n",y_pred)
602 +
603 + print("\nAccuracy:", accuracy_score(y_test, y_pred))
604 + print("\nClassification Report:")
605 + print(classification_report(y_test, y_pred))
606 +
607 + OUTPUT:
608 + Dataset Preview:
609 + Item no. Temp Humidity Wind Speed Play ...
610 + 0 1.0 85.0 85.0 12.0 No ...
611 + 1 2.0 80.0 90.0 9.0 No ...
612 + 2 3.0 83.0 86.0 4.0 Yes ...
613 + 3 4.0 70.0 96.0 3.0 Yes ...
614 + 4 5.0 68.0 80.0 5.0 Yes ...
615 +
616 + INPUT
617 + Temp Humidity Wind Speed
618 + 0 85.0 85.0 12.0
619 + 1 80.0 90.0 9.0
620 + 2 83.0 86.0 4.0
621 + 3 70.0 96.0 3.0
622 + 4 68.0 80.0 5.0
623 + ...
624 + [14 rows x 3 columns]
625 +
626 + OUTPUT
627 + 0 No
628 + 1 No
629 + 2 Yes
630 + 3 Yes
631 + 4 Yes
632 + ...
633 + Name: Play, dtype: object
634 +
635 + XTRAIN
636 + [[ 1.37690922 -0.53048047 -0.46006855]
637 + [-1.22885447 -0.99359834 2.25104967]
638 + [-0.57741354 -0.99359834 -0.46006855]
639 + [ 1.70262968 0.48837885 -0.64080976]
640 + ...
641 + [-1.3917147 -1.45671621 -1.00229219]]
642 +
643 + X_TEST
644 + [[ 0.39974784 -0.0673626 -1.00229219]
645 + [-0.08883285 0.85887314 -0.64080976]
646 + [ 2.02835014 0.39575527 0.80511996]]
647 +
648 + PREDICTION :
649 + ['Yes' 'Yes' 'Yes']
650 +
651 + Accuracy: 0.6666666666666666
652 +
653 + Classification Report:
654 + precision recall f1-score support
655 +
656 + No 0.00 0.00 0.00 1
657 + Yes 0.67 1.00 0.80 2
658 +
659 + accuracy 0.67 3
660 + macro avg 0.33 0.50 0.40 3
661 + weighted avg 0.44 0.67 0.53 3
662 +
663 +
664 + ===============================================================================
665 + PRACTICAL 7: MULTIPLE LINEAR REGRESSION
666 + (Prediction using multiple features with R² score and RMSE evaluation)
667 + ===============================================================================
668 +
669 + CODE:
670 + import pandas as pd
671 + import numpy as np
672 + import matplotlib.pyplot as plt
673 + from sklearn.linear_model import LinearRegression
674 + from sklearn.model_selection import train_test_split
675 + from sklearn.metrics import r2_score
676 +
677 + Data=pd.read_excel("student_data1.xlsx")
678 + print(Data)
679 +
680 + X=Data.iloc[:,:2]
681 + y=Data.iloc[:,-1:]
682 + print(X)
683 + print(y)
684 +
685 + X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2,random_state=42)
686 +
687 + print("Xtrain\n",X_train)
688 + print("XTEST\n",y_test)
689 +
690 + model = LinearRegression()
691 + model.fit(X_train.to_numpy(), y_train)
692 +
693 + y_pred = model.predict([[8.6,125]])
694 + print("model prediction on Ytest:\n",y_pred.round(2))
695 +
696 + print("M= ",model.coef_.round(2))
697 + print("b= ",model.intercept_.round(2))
698 +
699 + OUTPUT:
700 + CGPA IQ Placement (LPA)
701 + 0 7.5 110 6.5
702 + 1 8.0 120 7.0
703 + 2 8.5 125 8.2
704 + 3 9.0 130 9.1
705 + 4 6.5 100 5.0
706 + ...
707 + [10 rows x 3 columns]
708 +
709 + CGPA IQ
710 + 0 7.5 110
711 + 1 8.0 120
712 + 2 8.5 125
713 + 3 9.0 130
714 + 4 6.5 100
715 + ...
716 + [10 rows x 2 columns]
717 +
718 + Placement (LPA)
719 + 0 6.5
720 + 1 7.0
721 + 2 8.2
722 + 3 9.1
723 + 4 5.0
724 + ...
725 + [10 rows x 1 columns]
726 +
727 + Xtrain
728 + CGPA IQ
729 + 5 7.0 105
730 + 0 7.5 110
731 + 7 8.8 128
732 + 2 8.5 125
733 + ...
734 + [8 rows x 2 columns]
735 +
736 + XTEST
737 + Placement (LPA)
738 + 8 5.2
739 + 1 7.0
740 +
741 + model prediction on Ytest:
742 + [[8.45]]
743 +
744 + M= [[1.32 0.03]]
745 + b= [-6.51]
746 +
747 +
748 + ===============================================================================
749 + PRACTICAL 8: SINGULAR VALUE DECOMPOSITION (SVD)
750 + (Dimensionality Reduction using SVD - Manual & Sklearn Implementation)
751 + ===============================================================================
752 +
753 + ------- 8.1: SVD MANUAL IMPLEMENTATION (svd.py) -------
754 +
755 + CODE:
756 + import pandas as pd
757 + import numpy as np
758 +
759 + df = pd.read_excel("student_dataset.xlsx")
760 + A = df.iloc[:, :3].to_numpy()
761 + A_mean = A - np.mean(A, axis=0)
762 +
763 + U, X, V_T = np.linalg.svd(A_mean)
764 + k = 2
765 + U_k = U[:, :k]
766 + S_k = np.diag(X[:k])
767 +
768 + final_data1 = np.dot(U_k, S_k)
769 + print("Reduced Data:\n", final_data1)
770 +
771 + explained_variance = (X[:k]**2) / np.sum(X**2)
772 + print("Explained variance by top 2 components:", explained_variance)
773 +
774 + reduced_df = pd.DataFrame(final_data1, columns=["PC1", "PC2"])
775 + reduced_df['GTU'] = df['GTU'].values
776 + print(reduced_df)
777 +
778 + OUTPUT:
779 + Reduced Data:
780 + [[ -2.60622042 0.08983428]
781 + [ 15.20533711 -2.22651162]
782 + [-16.15266994 0.28962606]
783 + [ 22.72992624 -0.77843972]
784 + [ 3.46193108 -0.87192496]
785 + ...
786 + [-12.83777493 0.27648847]]
787 +
788 + Explained variance by top 2 components: [0.99132896 0.00672569]
789 +
790 + PC1 PC2 GTU
791 + 0 -2.606220 0.089834 70
792 + 1 15.205337 -2.226512 88
793 + 2 -16.152670 0.289626 65
794 + 3 22.729926 -0.778440 92
795 + 4 3.461931 -0.871925 78
796 + ...
797 + [15 rows x 3 columns]
798 +
799 + ------- 8.2: SVD USING SKLEARN (svd2.py) -------
800 +
801 + CODE:
802 + import pandas as pd
803 + import numpy as np
804 + from sklearn.decomposition import TruncatedSVD
805 + from sklearn.preprocessing import StandardScaler
806 +
807 + df = pd.read_excel("student_dataset.xlsx")
808 + X = df.iloc[:, :3]
809 +
810 + scaler = StandardScaler()
811 + X_scaled = scaler.fit_transform(X)
812 +
813 + svd = TruncatedSVD(n_components=2)
814 + X_reduced = svd.fit_transform(X_scaled)
815 + print(X_reduced)
816 +
817 + Dataf=pd.DataFrame(X_reduced,columns=['PC1','PC2'])
818 + Dataf['GTU Marks']=df['GTU'].values
819 + print(Dataf)
820 +
821 + print("Singular values:", svd.singular_values_)
822 + print("Explained variance:", svd.explained_variance_)
823 + print("Explained variance ratio:", svd.explained_variance_ratio_)
824 + print("Total variance captured:", svd.explained_variance_ratio_.sum())
825 +
826 + OUTPUT:
827 + [[-0.29412273 -0.01659157]
828 + [ 1.77010531 -0.27352604]
829 + [-2.00150714 0.06824795]
830 + [ 2.74518022 -0.074903 ]
831 + [ 0.42847827 -0.1586513 ]
832 + ...
833 + [-1.5266755 -0.01597918]]
834 +
835 + PC1 PC2 GTU Marks
836 + 0 -0.294123 -0.016592 70
837 + 1 1.770105 -0.273526 88
838 + 2 -2.001507 0.068248 65
839 + 3 2.745180 -0.074903 92
840 + 4 0.428478 -0.158651 78
841 + ...
842 + [15 rows x 3 columns]
843 +
844 + Singular values: [6.67619539 0.55259316]
845 + Explained variance: [2.97143899 0.02035728]
846 + Explained variance ratio: [0.99047966 0.00678576]
847 + Total variance captured: 0.997265423131314
848 +
849 +
850 + ===============================================================================
851 + END OF JOURNAL
852 + ===============================================================================
Più nuovi Più vecchi