Google BigQuery how to create machine learning model using SQL
-- standardSQL to build the model
CREATE OR REPLACE MODEL `bqml_lab.sample_model`
OPTIONS(model_type='logistic_reg') AS
SELECT
IF(totals.transactions IS NULL, 0, 1) AS label,
IFNULL(device.operatingSystem, "") AS os,
device.isMobile AS is_mobile,
IFNULL(geoNetwork.country, "") AS country,
IFNULL(totals.pageviews, 0) AS pageviews
FROM
`bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE
_TABLE_SUFFIX BETWEEN '20160801' AND '20170631'
LIMIT 100000;
SELECT
*
FROM
ml.EVALUATE(MODEL `bqml_lab.sample_model`, (
SELECT
IF(totals.transactions IS NULL, 0, 1) AS label,
IFNULL(device.operatingSystem, "") AS os,
device.isMobile AS is_mobile,
IFNULL(geoNetwork.country, "") AS country,
IFNULL(totals.pageviews, 0) AS pageviews
FROM
`bigquery-public-data.google_analytics_sample.ga_sessions_*`
WHERE
_TABLE_SUFFIX BETWEEN '20170701' AND '20170801'));
-- We used logistic regression for this model, hence, this query returns
-- values such as precision, recall, accuracy, f1_score, log_loss, roc_auc.
-- If it was linear regression, we'd have seen values such as mean_absolute_error,
-- mean_squared_error, mean_squared_log_error, median_absolute_error,
-- r2_score, explained_variance.
-- output in Json/Grid
[
{
"precision": "0.43601895734597157",
"recall": "0.0856610800744879",
"accuracy": "0.9851952452667814",
"f1_score": "0.1431906614785992",
"log_loss": "0.04735969966406397",
"roc_auc": "0.982000999000999"
}
]