link to website

This commit is contained in:
Pawel Sarkowicz
2026-03-31 16:05:58 -04:00
parent f81d01fe52
commit 290e8bc3e6
6 changed files with 134 additions and 1241 deletions

View File

@@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -68,219 +68,30 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "8514ed8b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Square ft</th>\n",
" <th>Square m</th>\n",
" <th>Bedrooms</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5.000000</td>\n",
" <td>5.000000</td>\n",
" <td>5.00000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1770.000000</td>\n",
" <td>164.000000</td>\n",
" <td>3.20000</td>\n",
" <td>547.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>258.843582</td>\n",
" <td>24.052027</td>\n",
" <td>0.83666</td>\n",
" <td>81.516869</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1550.000000</td>\n",
" <td>144.000000</td>\n",
" <td>2.00000</td>\n",
" <td>475.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1600.000000</td>\n",
" <td>148.000000</td>\n",
" <td>3.00000</td>\n",
" <td>490.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1600.000000</td>\n",
" <td>148.000000</td>\n",
" <td>3.00000</td>\n",
" <td>500.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2000.000000</td>\n",
" <td>185.000000</td>\n",
" <td>4.00000</td>\n",
" <td>620.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2100.000000</td>\n",
" <td>195.000000</td>\n",
" <td>4.00000</td>\n",
" <td>650.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Square ft Square m Bedrooms Price\n",
"count 5.000000 5.000000 5.00000 5.000000\n",
"mean 1770.000000 164.000000 3.20000 547.000000\n",
"std 258.843582 24.052027 0.83666 81.516869\n",
"min 1550.000000 144.000000 2.00000 475.000000\n",
"25% 1600.000000 148.000000 3.00000 490.000000\n",
"50% 1600.000000 148.000000 3.00000 500.000000\n",
"75% 2000.000000 185.000000 4.00000 620.000000\n",
"max 2100.000000 195.000000 4.00000 650.000000"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "0eb032aa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Square ft</th>\n",
" <th>Square m</th>\n",
" <th>Bedrooms</th>\n",
" <th>Price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Square ft</th>\n",
" <td>1.000000</td>\n",
" <td>0.999886</td>\n",
" <td>0.900426</td>\n",
" <td>0.998810</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Square m</th>\n",
" <td>0.999886</td>\n",
" <td>1.000000</td>\n",
" <td>0.894482</td>\n",
" <td>0.998395</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Bedrooms</th>\n",
" <td>0.900426</td>\n",
" <td>0.894482</td>\n",
" <td>1.000000</td>\n",
" <td>0.909066</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Price</th>\n",
" <td>0.998810</td>\n",
" <td>0.998395</td>\n",
" <td>0.909066</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Square ft Square m Bedrooms Price\n",
"Square ft 1.000000 0.999886 0.900426 0.998810\n",
"Square m 0.999886 1.000000 0.894482 0.998395\n",
"Bedrooms 0.900426 0.894482 1.000000 0.909066\n",
"Price 0.998810 0.998395 0.909066 1.000000"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df.corr()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "6a166792",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(8222.19067218415)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"np.linalg.cond(X)"
]
@@ -330,7 +141,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -363,128 +174,60 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"id": "799ea5da",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(4.999999999999999)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"sigma1"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "e17ad031",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-0.70710678],\n",
" [-0.70710678]])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"u1"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "b75d1b41",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-7.07106781e-01, -7.07106781e-01, -6.47932334e-17]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"v1T"
]
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "cda3bc1a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2.50000000e+00, 2.50000000e+00, 2.29078674e-16],\n",
" [2.50000000e+00, 2.50000000e+00, 2.29078674e-16]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"A1"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "5741dc92",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(3.0)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"frobenius_error"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"id": "b1171244",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(3.0)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"operator_error"
]
@@ -513,7 +256,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -555,46 +298,20 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "4288abb2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1770. , 164. , 3.2, 547. ])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"X_means"
]
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "31c2ebf2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-1.70e+02, -1.60e+01, -2.00e-01, -4.70e+01],\n",
" [ 3.30e+02, 3.10e+01, 8.00e-01, 1.03e+02],\n",
" [-2.20e+02, -2.00e+01, -1.20e+00, -7.20e+01],\n",
" [-1.70e+02, -1.60e+01, -2.00e-01, -5.70e+01],\n",
" [ 2.30e+02, 2.10e+01, 8.00e-01, 7.30e+01]])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"X_centered"
]
@@ -610,31 +327,10 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": null,
"id": "d944d257",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"U = [[-0.32486018 -0.81524197 -0.01735449 -0.17188722 0.4472136 ]\n",
" [ 0.63705869 0.10707263 -0.3450375 -0.51345964 0.4472136 ]\n",
" [-0.42643013 0.35553416 -0.61058318 0.34487822 0.4472136 ]\n",
" [-0.33034709 0.436448 0.61781883 -0.3445052 0.4472136 ]\n",
" [ 0.44457871 -0.08381281 0.35515633 0.68497384 0.4472136 ]]\n",
"\n",
"S = [5.44828440e+02 7.61035608e+00 8.91429037e-01 2.41987799e-01]\n",
"\n",
"Vh.T = [[ 0.95017495 0.29361033 0.08182661 0.06530651]\n",
" [ 0.08827897 0.06690917 -0.71081981 -0.69459714]\n",
" [ 0.00276797 -0.04366082 0.69629997 -0.71641638]\n",
" [ 0.29894268 -0.95258064 -0.05662119 0.00417714]]\n",
"\n",
"Condition number of X_centered = 2251.4707027583063\n"
]
}
],
"outputs": [],
"source": [
"print(f\"U = {U}\\n\\nS = {S}\\n\\nVh.T = {Vh.T}\\n\")\n",
"print(\"Condition number of X_centered = \", np.linalg.cond(X_centered))"
@@ -650,7 +346,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -675,37 +371,9 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[-168.1743765 -15.62476472 -0.48991109 -52.91078079]\n",
" [ 329.79403078 30.64054254 0.96072753 103.7593243 ]\n",
" [-220.7553464 -20.50996365 -0.64308544 -69.45373002]\n",
" [-171.01485494 -15.88866823 -0.49818573 -53.80444804]\n",
" [ 230.15054706 21.38285405 0.67045472 72.40963456]] \n",
" k=1: relative Frobenius reconstruction error on centered data = 0.0141 \n",
"\n",
"[[-1.69996018e+02 -1.60398881e+01 -2.19027093e-01 -4.70007022e+01]\n",
" [ 3.30033282e+02 3.06950642e+01 9.25150039e-01 1.02983104e+02]\n",
" [-2.19960913e+02 -2.03289247e+01 -7.61220318e-01 -7.20311670e+01]\n",
" [-1.70039621e+02 -1.56664278e+01 -6.43206200e-01 -5.69684681e+01]\n",
" [ 2.29963269e+02 2.13401763e+01 6.98303572e-01 7.30172337e+01]] \n",
" k=2: relative Frobenius reconstruction error on centered data = 0.0017 \n",
"\n",
"[[-1.69997284e+02 -1.60288915e+01 -2.29799059e-01 -4.69998263e+01]\n",
" [ 3.30008114e+02 3.09136956e+01 7.10984571e-01 1.03000519e+02]\n",
" [-2.20005450e+02 -1.99420315e+01 -1.14021052e+00 -7.20003486e+01]\n",
" [-1.69994556e+02 -1.60579058e+01 -2.59724807e-01 -5.69996518e+01]\n",
" [ 2.29989175e+02 2.11151332e+01 9.18749820e-01 7.29993076e+01]] \n",
" k=3: relative Frobenius reconstruction error on centered data = 0.0004 \n",
"\n"
]
}
],
"outputs": [],
"source": [
"for k in [1, 2, 3]:\n",
"\t# Define our reduced matrix\n",