1 ! Copyright (C) 2019 HMC Clinic.
2 ! See http://factorcode.org/license.txt for BSD license.
3 ! Code based on https://towardsdatascience.com/linear-regression-from-scratch-with-numpy-implementation-finally-8e617d8e274c
5 USING: arrays accessors csv io io.encodings.utf8 kernel locals math math.parser
6 math.ranges math.statistics prettyprint sequences tensors ;
11 ! Normalize across each of the features
12 :: normalize ( X -- norm )
13 ! Compute the mean for each of the features and repeat it so that it can be
15 X transpose tensor>array :> X-T
16 X-T [ mean ] map >tensor :> feat-means
17 X shape>> first [0,b) [ drop feat-means ] map stack :> means
18 ! Compute the std for each of the features and repeat it so that it can be
20 X-T [ std ] map >tensor :> feat-stds
21 X shape>> first [0,b) [ drop feat-stds ] map stack :> stds
24 :: compute-cost ( X y params -- cost )
25 ! Compute (1/(2*n_samples))
26 1 2 y shape>> first * /
29 ! Compute sum((h-y)**2)
31 ! Multiply to get final cost
34 :: gradient-descent ( X y params lr n-iters -- history params )
35 lr y shape>> first / :> batch-lr
36 { n-iters } zeros :> history
41 ! params = params - (learning_rate/n_samples) * X.T @ (X @ params - y)
42 swap dup :> old-params
43 batch-lr X-T X old-params matmul y t- matmul t* t- :> new-params
44 ! Compute the cost and add it to the history
45 X y new-params compute-cost swap history set-nth
52 :: linear-regression ( X y lr n-iters -- )
54 ! Add the constant coefficient
55 y shape>> first 1 2array ones swap 2array hstack :> X-norm
56 ! Create the array of parameters
57 X-norm shape>> second 1 2array zeros :> params
58 ! Compute the initial cost
59 X-norm y params compute-cost
61 number>string "The initial cost is " swap append print
62 ! Perform gradient descent
63 X-norm y params lr n-iters gradient-descent
64 "The optimal parameters are " print .
65 last number>string "The final cost was " swap append print
68 ! Load and return the boston house-prices dataset
69 : load-boston-data ( -- X y )
70 "vocab:tensors/demos/data.csv" utf8 file>csv
71 [ [ string>number ] map ] map >tensor
72 "vocab:tensors/demos/target.csv" utf8 file>csv
73 [ [ string>number ] map ] map >tensor ;