This is documentation for an old release of Scikit-learn (version 0.22). Try the latest stable release (version 1.6) or development (unstable) versions.
Note
Click here to download the full example code or to run this example in your browser via Binder
Lasso on dense and sparse data¶
We show that linear_model.Lasso provides the same results for dense and sparse data and that in the case of sparse data the speed is improved.
Out:
--- Dense matrices
Sparse Lasso done in 0.106757s
Dense Lasso done in 0.039164s
Distance between coefficients : 1.0705405751792344e-13
--- Sparse matrices
Matrix density : 0.6263000000000001 %
Sparse Lasso done in 0.193909s
Dense Lasso done in 0.866051s
Distance between coefficients : 7.928463765972842e-12
print(__doc__)
from time import time
from scipy import sparse
from scipy import linalg
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
# #############################################################################
# The two Lasso implementations on Dense data
print("--- Dense matrices")
X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
X_sp = sparse.coo_matrix(X)
alpha = 1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
t0 = time()
sparse_lasso.fit(X_sp, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(X, y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
# #############################################################################
# The two Lasso implementations on Sparse data
print("--- Sparse matrices")
Xs = X.copy()
Xs[Xs < 2.5] = 0.0
Xs = sparse.coo_matrix(Xs)
Xs = Xs.tocsc()
print("Matrix density : %s %%" % (Xs.nnz / float(X.size) * 100))
alpha = 0.1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
t0 = time()
sparse_lasso.fit(Xs, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(Xs.toarray(), y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
Total running time of the script: ( 0 minutes 1.579 seconds)
Estimated memory usage: 8 MB