This is documentation for an old release of Scikit-learn (version 0.20). Try the latest stable release (version 1.6) or development (unstable) versions.
Note
Click here to download the full example code
Lasso on dense and sparse data¶
We show that linear_model.Lasso provides the same results for dense and sparse data and that in the case of sparse data the speed is improved.
Out:
--- Dense matrices
Sparse Lasso done in 0.142571s
Dense Lasso done in 0.047774s
Distance between coefficients : 1.017962516548778e-13
--- Sparse matrices
Matrix density : 0.6263000000000001 %
Sparse Lasso done in 0.230232s
Dense Lasso done in 0.976481s
Distance between coefficients : 7.344760355532163e-12
print(__doc__)
from time import time
from scipy import sparse
from scipy import linalg
from sklearn.datasets.samples_generator import make_regression
from sklearn.linear_model import Lasso
# #############################################################################
# The two Lasso implementations on Dense data
print("--- Dense matrices")
X, y = make_regression(n_samples=200, n_features=5000, random_state=0)
X_sp = sparse.coo_matrix(X)
alpha = 1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=1000)
t0 = time()
sparse_lasso.fit(X_sp, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(X, y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
# #############################################################################
# The two Lasso implementations on Sparse data
print("--- Sparse matrices")
Xs = X.copy()
Xs[Xs < 2.5] = 0.0
Xs = sparse.coo_matrix(Xs)
Xs = Xs.tocsc()
print("Matrix density : %s %%" % (Xs.nnz / float(X.size) * 100))
alpha = 0.1
sparse_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
dense_lasso = Lasso(alpha=alpha, fit_intercept=False, max_iter=10000)
t0 = time()
sparse_lasso.fit(Xs, y)
print("Sparse Lasso done in %fs" % (time() - t0))
t0 = time()
dense_lasso.fit(Xs.toarray(), y)
print("Dense Lasso done in %fs" % (time() - t0))
print("Distance between coefficients : %s"
% linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_))
Total running time of the script: ( 0 minutes 1.614 seconds)