This is documentation for an old release of Scikit-learn (version 0.24). Try the latest stable release (version 1.6) or development (unstable) versions.
Note
Click here to download the full example code or to run this example in your browser via Binder
Lasso on dense and sparse data¶
We show that linear_model.Lasso provides the same results for dense and sparse data and that in the case of sparse data the speed is improved.
Out:
--- Dense matrices
Sparse Lasso done in 0.134139s
Dense Lasso done in 0.049522s
Distance between coefficients : 6.852225994430906e-14
--- Sparse matrices
Matrix density : 0.6263000000000001 %
Sparse Lasso done in 0.160597s
Dense Lasso done in 1.076999s
Distance between coefficients : 7.768625813853113e-12
print(__doc__)
from time import time
from scipy import sparse
from scipy import linalg
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
# #############################################################################
# The two Lasso implementations on Dense data
print("--- Dense matrices")
X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
X_sp = sparse.coo_matrix(X)
alpha = 1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
t0 = time()
sparse_lasso.fit(X_sp, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(X, y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
# #############################################################################
# The two Lasso implementations on Sparse data
print("--- Sparse matrices")
Xs = X.copy()
Xs[Xs < 2.5] = 0.0
Xs = sparse.coo_matrix(Xs)
Xs = Xs.tocsc()
print("Matrix density : %s %%" % (Xs.nnz / float(X.size) * 100))
alpha = 0.1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
t0 = time()
sparse_lasso.fit(Xs, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(Xs.toarray(), y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
Total running time of the script: ( 0 minutes 1.512 seconds)