Skip to content

NeoPrimate/polars_models

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

7 Commits
 
 

Repository files navigation

Polars Models

Linear Model (lm)

df = pl.DataFrame(...)
model = df.lm(pl.col("y") ~ pl.col("x1") + pl.col("x2"))
results = model.fit()
results.summary()

Generalized Linear Models (glm)

df = pl.DataFrame(...)
model = df.glm(pl.col("y") ~ pl.col("x1") + pl.col("x2"), family="binomial")
results = model.fit()
results.summary()

Nonlinear Least Squares (nls)

df = pl.DataFrame(...)
model = df.lm(pl.col("y") ~ pl.const("a") * pl.exp(pl.const("b") * pl.col("x1")), start={"a": 1.0,"b": 0.1})
results = model.fit()
results.summary()

Mixed effects / hierarchical models (lme, lmer)

df = pl.DataFrame(...)
model = df.lm(pl.col("y") ~ pl.col("x1") + pl.col("x2"))
results = model.fit()
results.summary()

PCA (prcomp, princomp)

df = pl.DataFrame(...)
model = df.princomp([pl.col("x1"), pl.col("x2"), pl.col("x3")])
results = model.fit()
results.summary()

Tree-based & ensemble models (xgb)

df = pl.DataFrame(...)
model = df.xgb([pl.col("x1"), pl.col("x2"), pl.col("x3")])
results = model.fit()
results.summary()

Time Series

ARIMA (ma, arma, arima, varimax)

df = pl.DataFrame(...)
model = df.arima(pl.col("y"), order=(1, 1, 0))
results = model.fit()
results.summary()
df = pl.DataFrame(...)
model = df.varimax(pl.col("y") ~ pl.col("x_1"), order=(1, 1, 0))
results = model.fit()
results.summary()

ETS

# Auto
# alpha, beta, phi, gamma
# A, N, M, Ad, Md, Z

# Fully Auto
pl.col("x").stats.ets() # Defaults to "ZZZ" (Auto everything)

# Specifying specific letters via tuple
pl.col("x").stats.ets(model=("M", "Ad", "M"))

# Specifying specific letters via tuple and alpha
pl.col("x").stats.ets(model=("A", "A", "A"), alpha=0.5)

df.select(
    pl.col("y").stats.glm(
        [
            pl.col("x1"),
            pl.col("x2").pow(2),
            pl.col("x3").fill_null(0),
            pl.col("category").to_dummy(),
        ],
        family: "binomial"
    )
)
df.select([
    pl.col("y").stats.ols([pl.col("x1")]).alias("simple_lm"),
    pl.col("y").stats.ols([pl.col("x1"), pl.col("x2"), pl.col("x3")]).alias("complex_lm"),
    pl.col("y").stats.lasso([pl.col("x1"), pl.col("x2")], alpha=0.1).alias("sparse_model")
])
df.select(
    pl.col("y").stats.ols(
        [
            pl.col("x1"),
            pl.col("x2").log().alias("log_x2"),
            (pl.col("x3") * pl.col("x4")).alias("interaction_term")
        ]
    )
)
df.select(
    pl.col("y").stats.glm(
        [
            pl.col("x1"),
            pl.col("x2").pow(2),
            pl.col("x3").fill_null(0),
            pl.col("category").to_dummy(),
        ],
        family: "binomial"
    ).report().alias("glm_report")
).unnest("glm_report")
results = df.select([
    pl.col("y").stats.ols(["x1"]).alias("lm"),
    pl.col("y").stats.lasso(["x1", "x2"]).alias("lasso")
])

lm = results.select(pl.col("lm")).unnest("lm")
lasso = results.select(pl.col("lasso")).unnest("lasso")

comparison = results.select([
    pl.col("lm").struct.field("r_squared").alias("lm_r2"),
    pl.col("lasso").struct.field("r_squared").alias("lasso_r2"),
])

Long Table

df_models = pl.concat([
    df.select(pl.col("y").stats.ols(["x1"]).alias("res")).unnest("res"),
    df.select(pl.col("y").stats.lasso(["x1"]).alias("res")).unnest("res"),
], how="vertical")

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors