using Plots
d = [237, 214, 161, 146, 331, 159, 423, 332, 139, 327, 152, 98, 116]
o = [121, 143, 63, 80, 198, 52, 160, 162, 85, 106, 129, 141, 106]
plot([o d], xlabel="Week", ylabel="Units", xticks=1:length(d), label=["Orders" "Demand"])


using Optim
fd̂(α,o) = α[1] .+ α[2]o
fL2(α) = sum((fd̂(α,o) .- d).^2)
α2ᵒ = optimize(α -> fL2(α), [0. 1.]).minimizer    # Starting α2 values = [0., 1.]

1×2 Matrix{Float64}:
 44.8644  1.45651


using Statistics
o14 = round(mean(o))
q14 = fd̂(α2ᵒ,o14)
o14, q14

(119.0, 218.18896992673754)


using DataFrames
df = DataFrame(d=d,o=o)


describe(df)


using MLJ
schema(df)

┌─────────┬─────────┬────────────┐
│ _.names │ _.types │ _.scitypes │
├─────────┼─────────┼────────────┤
│ d       │ Int64   │ Count      │
│ o       │ Int64   │ Count      │
└─────────┴─────────┴────────────┘
_.nrows = 13


df = coerce(df, Count=>Continuous)


y = df.d                   # Response can be a vector
X = select(df, Not(:d))    # Predictors need to be keep as a Dataframe


using MLJLinearModels
@load LinearRegressor pkg=MLJLinearModels

┌ Info: For silent loading, specify `verbosity=0`. 
└ @ Main C:\Users\kay\.julia\packages\MLJModels\quege\src\loading.jl:168

import MLJLinearModels ✔

LinearRegressor


model = LinearRegressor()

LinearRegressor(
    fit_intercept = true,
    solver = nothing) @311


mach = machine(model, X, y)

Machine{LinearRegressor,…} @253 trained 0 times; caches data
  args: 
    1:	Source @183 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @617 ⏎ `AbstractVector{Continuous}`


fit!(mach)

┌ Info: Training Machine{LinearRegressor,…} @253.
└ @ MLJBase C:\Users\kay\.julia\packages\MLJBase\xlh6G\src\machines.jl:390

Machine{LinearRegressor,…} @253 trained 1 time; caches data
  args: 
    1:	Source @183 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @617 ⏎ `AbstractVector{Continuous}`


fit!(mach, verbosity=0)

Machine{LinearRegressor,…} @253 trained 1 time; caches data
  args: 
    1:	Source @183 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @617 ⏎ `AbstractVector{Continuous}`


fp = fitted_params(mach)

(coefs = [:o => 1.4565093377639278],
 intercept = 44.864351062843625,)


α2ᵒ

1×2 Matrix{Float64}:
 44.8644  1.45651


predict(mach, DataFrame(o14=o14)), q14

([218.18896225675104], 218.18896992673754)


using DataFrames, CSV
df = DataFrame(CSV.File("ML-2-bakery.csv"))
first(df, 5)


using MLJ
schema(df)

┌──────────┬─────────┬────────────┐
│ _.names  │ _.types │ _.scitypes │
├──────────┼─────────┼────────────┤
│ Demand   │ Int64   │ Count      │
│ Orders   │ Int64   │ Count      │
│ Visitors │ Int64   │ Count      │
└──────────┴─────────┴────────────┘
_.nrows = 49


df = coerce(df, Any=>Continuous)
describe(df)


y = df.Demand
X = select(df, [:Orders])
first(X, 5)


model = LinearRegressor()
mach = machine(model, X, y)
fit!(mach, verbosity=0)
fp = fitted_params(mach)
@show fp.intercept
fp.coefs

fp.intercept = 95.65902562021842

1-element Vector{Pair{Symbol, Float64}}:
 :Orders => 0.9904242987011688


fd̂(α,o) = α[1] .+ α[2]X.Orders
fL2(α) = sum((fd̂(α,X.Orders) .- y).^2)
α2ᵒ = optimize(α -> fL2(α), [0. 1.]).minimizer

1×2 Matrix{Float64}:
 95.659  0.990424


fd̂Q(α,o) = α[1] .+ α[2]X.Orders .+ α[3]X.Orders.^2
fL2(α) = sum((fd̂Q(α,X.Orders) .- y).^2)
α2Qᵒ = optimize(α -> fL2(α), [0. 1. 0.]).minimizer

1×3 Matrix{Float64}:
 43.4339  1.59494  -0.00133034


X2 = hcat(X,X.Orders.^2)
first(X2, 5)


rename!(X2, :x1 => :Orders2)
first(X2, 5)


model = LinearRegressor()
mach2 = machine(model, X2, y)
fit!(mach2, verbosity=0)
fp = fitted_params(mach2)
@show fp.intercept
fp.coefs

fp.intercept = 43.43386404635581

2-element Vector{Pair{Symbol, Float64}}:
  :Orders => 1.5949445079991225
 :Orders2 => -0.001330335965969226


train, test = partition(eachindex(y), 0.70, shuffle=true, rng=1234)

([17, 38, 13, 11, 42, 31, 40, 43, 1, 49  …  15, 18, 48, 6, 7, 19, 30, 2, 26, 21], [33, 8, 25, 44, 35, 4, 10, 16, 3, 20, 29, 23, 12, 24, 45])


length(train), length(test)

(34, 15)


length(y), 0.7*length(y), round(0.7*length(y)), length(y) - round(0.7*length(y))

(49, 34.3, 34.0, 15.0)


train, test = partition(eachindex(y), 0.70, shuffle=true)

([2, 14, 7, 30, 16, 13, 47, 41, 27, 12  …  38, 3, 17, 1, 36, 15, 48, 34, 32, 29], [45, 26, 18, 19, 28, 46, 5, 25, 23, 10, 21, 8, 35, 24, 49])


ŷ = predict(mach, X)
[ŷ[1:5] y[1:5]]

5×2 Matrix{Float64}:
 490.838  434.0
 222.433  224.0
 178.855  220.0
 332.37   361.0
 117.448   72.0


res = ŷ .- y
using Plots
plot(res, line=:stem, label="Residuals")


sqrt(mean(res.^2))

65.2670423338599


rms(ŷ, y)

65.2670423338599


fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
rms(ŷ, y[test])

61.767437042073965


fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
rms(ŷ2, y[test])

70.87380635619279


train, test = partition(eachindex(y), 0.70, shuffle=true)
fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
rms(ŷ, y[test]), rms(ŷ2, y[test])

(47.34927021058105, 43.350173788059756)


train, test = partition(eachindex(y), 0.70, shuffle=true)
fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
rms(ŷ, y[test]), rms(ŷ2, y[test])

(53.29300689279115, 47.408306711060234)


first(df, 5)


y = df.Demand
X3 = select(df, Not(:Demand))
first(X3, 5)


model = LinearRegressor()
mach3 = machine(model, X3, y)
fit!(mach3, verbosity=0)
fp = fitted_params(mach3)
@show fp.intercept
fp.coefs

fp.intercept = 77.10037182633464

2-element Vector{Pair{Symbol, Float64}}:
   :Orders => 0.908444505356322
 :Visitors => 0.4601655916705607


train, test = partition(eachindex(y), 0.70, shuffle=true, rng=3425)
fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
fit!(mach3, rows=train, verbosity=0)
ŷ3 = predict(mach3, rows=test)
rms(ŷ, y[test]), rms(ŷ2, y[test]), rms(ŷ3, y[test])

(71.94705454334338, 63.93818549290661, 64.66435320753095)


train, test = partition(eachindex(y), 0.70, shuffle=true)
fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
fit!(mach3, rows=train, verbosity=0)
ŷ3 = predict(mach3, rows=test)
rms(ŷ, y[test]), rms(ŷ2, y[test]), rms(ŷ3, y[test])

(82.47233383135628, 81.10059318357071, 81.66412823039089)


train, test = partition(eachindex(y), 0.70, shuffle=true)
fit!(mach, rows=train, verbosity=0)
ŷ = predict(mach, rows=test)
fit!(mach2, rows=train, verbosity=0)
ŷ2 = predict(mach2, rows=test)
fit!(mach3, rows=train, verbosity=0)
ŷ3 = predict(mach3, rows=test)
rms(ŷ, y[test]), rms(ŷ2, y[test]), rms(ŷ3, y[test])

(42.503339551120874, 46.81571699391165, 47.02943648841222)


using DataFrames, CSV
df = DataFrame(CSV.File("ML-2-housing.csv"))
first(df, 5)


using MLJ, MLJLinearModels
schema(df)

┌─────────┬─────────┬────────────┐
│ _.names │ _.types │ _.scitypes │
├─────────┼─────────┼────────────┤
│ crim    │ Float64 │ Continuous │
│ zn      │ Float64 │ Continuous │
│ indus   │ Float64 │ Continuous │
│ chas    │ Int64   │ Count      │
│ nox     │ Float64 │ Continuous │
│ rm      │ Float64 │ Continuous │
│ age     │ Float64 │ Continuous │
│ dis     │ Float64 │ Continuous │
│ rad     │ Int64   │ Count      │
│ tax     │ Int64   │ Count      │
│ ptratio │ Float64 │ Continuous │
│ b       │ Float64 │ Continuous │
│ lstat   │ Float64 │ Continuous │
│ medv    │ Float64 │ Continuous │
└─────────┴─────────┴────────────┘
_.nrows = 506


df = coerce(df, autotype(df, :discrete_to_continuous))
describe(df)


y = df.medv
X = select(df, Not(:medv))
model = LinearRegressor()
mach = machine(model, X, y)
fit!(mach, verbosity=0)
fp = fitted_params(mach)
@show fp.intercept
fp.coefs

fp.intercept = 36.45948838508991

13-element Vector{Pair{Symbol, Float64}}:
    :crim => -0.10801135783679676
      :zn => 0.04642045836688124
   :indus => 0.02055862636707048
    :chas => 2.6867338193448878
     :nox => -17.76661122830013
      :rm => 3.8098652068092163
     :age => 0.0006922246403443672
     :dis => -1.475566845600255
     :rad => 0.30604947898517343
     :tax => -0.012334593916574377
 :ptratio => -0.9527472317072893
       :b => 0.00931168327379384
   :lstat => -0.5247583778554893


ŷ = predict(mach, X)
res = ŷ .- y
using Plots
plot(res, line=:stem, label="Residuals")


predict(mach, DataFrame(X[1:5,:]))

5-element Vector{Float64}:
 30.003843377016786
 25.025562379053138
 30.567596718601617
 28.60703648872812
 27.943524232873024


[ŷ[1:10] y[1:10]]

10×2 Matrix{Float64}:
 30.0038  24.0
 25.0256  21.6
 30.5676  34.7
 28.607   33.4
 27.9435  36.2
 25.2563  28.7
 23.0018  22.9
 19.536   27.1
 11.5236  16.5
 18.9203  18.9

	d	o
	Float64	Float64
1	237.0	121.0
2	214.0	143.0
3	161.0	63.0
4	146.0	80.0
5	331.0	198.0
6	159.0	52.0
7	423.0	160.0
8	332.0	162.0
9	139.0	85.0
10	327.0	106.0
11	152.0	129.0
12	98.0	141.0
13	116.0	106.0

	o
	Float64
1	121.0
2	143.0
3	63.0
4	80.0
5	198.0
6	52.0
7	160.0
8	162.0
9	85.0
10	106.0
11	129.0
12	141.0
13	106.0

	Orders
	Float64
1	399.0
2	128.0
3	84.0
4	239.0
5	22.0

	Orders	x1
	Float64	Float64
1	399.0	159201.0
2	128.0	16384.0
3	84.0	7056.0
4	239.0	57121.0
5	22.0	484.0

	Orders	Orders2
	Float64	Float64
1	399.0	159201.0
2	128.0	16384.0
3	84.0	7056.0
4	239.0	57121.0
5	22.0	484.0

ML 2: Regression¶

Ex 1: 13-Day Lee Pharm's Bakery Data (Ex 2 in _NLOpt 3)¶

Ex 2: 7-Week Bakery Data¶

Add Order-Squared Term to Model (Quadratic Regression)¶

Add Visitor Predictor to Model (Multiple Regression)¶

Ex 3: Boston Housing Data¶

	d	o
	Int64	Int64
1	237	121
2	214	143
3	161	63
4	146	80
5	331	198
6	159	52
7	423	160
8	332	162
9	139	85
10	327	106
11	152	129
12	98	141
13	116	106

	variable	mean	min	median	max	nmissing	eltype
	Symbol	Float64	Int64	Float64	Int64	Int64	DataType
1	d	218.077	98	161.0	423	0	Int64
2	o	118.923	52	121.0	198	0	Int64

Variable	Describtion
crim	per capita crime rate by town
zn	proportion of residential land zoned for lots over 25,000 sq.ft
indus	proportion of non-retail business acres per town
chas	Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
nox	nitric oxides concentration (parts per 10 million)
rm	average number of rooms per dwelling
age	proportion of owner-occupied units built prior to 1940
dis	weighted distances to five Boston employment centres
rad	index of accessibility to radial highways
tax	full-value property-tax rate per USD 10,000
ptratio	pupil-teacher ratio by town
b	1000(B - 0.63)^2 where B is the proportion of blacks by town
lstat	percentage of lower status of the population
medv	median value of owner-occupied homes in USD 1000's

	Demand	Orders	Visitors
	Int64	Int64	Int64
1	434	399	160
2	224	128	96
3	220	84	100
4	361	239	33
5	72	22	30

	crim	zn	indus	chas	nox	rm	age	dis	rad	tax
	Float64	Float64	Float64	Int64	Float64	Float64	Float64	Float64	Int64	Int64
1	0.00632	18.0	2.31	0	0.538	6.575	65.2	4.09	1	296
2	0.02731	0.0	7.07	0	0.469	6.421	78.9	4.9671	2	242
3	0.02729	0.0	7.07	0	0.469	7.185	61.1	4.9671	2	242
4	0.03237	0.0	2.18	0	0.458	6.998	45.8	6.0622	3	222
5	0.06905	0.0	2.18	0	0.458	7.147	54.2	6.0622	3	222

	variable	mean	min	median	max	nmissing	eltype
	Symbol	Float64	Float64	Float64	Float64	Int64	DataType
1	crim	3.61352	0.00632	0.25651	88.9762	0	Float64
2	zn	11.3636	0.0	0.0	100.0	0	Float64
3	indus	11.1368	0.46	9.69	27.74	0	Float64
4	chas	0.06917	0.0	0.0	1.0	0	Float64
5	nox	0.554695	0.385	0.538	0.871	0	Float64
6	rm	6.28463	3.561	6.2085	8.78	0	Float64
7	age	68.5749	2.9	77.5	100.0	0	Float64
8	dis	3.79504	1.1296	3.20745	12.1265	0	Float64
9	rad	9.54941	1.0	5.0	24.0	0	Float64
10	tax	408.237	187.0	330.0	711.0	0	Float64
11	ptratio	18.4555	12.6	19.05	22.0	0	Float64
12	b	356.674	0.32	391.44	396.9	0	Float64
13	lstat	12.6531	1.73	11.36	37.97	0	Float64
14	medv	22.5328	5.0	21.2	50.0	0	Float64