diff --git a/hugo_blag/content/posts/2018-04-08-recommender-systems-1/index.md b/hugo_blag/content/posts/2018-04-08-recommender-systems-1/index.md index 7537ff1..e730188 100644 --- a/hugo_blag/content/posts/2018-04-08-recommender-systems-1/index.md +++ b/hugo_blag/content/posts/2018-04-08-recommender-systems-1/index.md @@ -110,8 +110,7 @@ Below is just to inspect that data appears to be okay: ml.info() {{< / highlight >}} -
RangeIndex: 20000263 entries, 0 to 20000262
Data columns (total 4 columns):
@@ -121,9 +120,7 @@ ml.info()
time datetime64[ns]
dtypes: datetime64[ns](1), float32(1), int32(2)
memory usage: 381.5 MB
-
- +| user_id | movie_id | rating +|---------|----------|------- count|2.000026e+07|2.000026e+07|2.000026e+07 mean|6.904587e+04|9.041567e+03|3.494030e+00 std|4.003863e+04|1.978948e+04|9.998490e-01 @@ -145,7 +142,7 @@ min|1.000000e+00|1.000000e+00|5.000000e-01 50%|6.914100e+04|2.167000e+03|3.500000e+00 75%|1.036370e+05|4.770000e+03|4.000000e+00 max|1.384930e+05|1.312620e+05|5.000000e+00 -
| user_id | movie_id | rating | time |--------|---------|-------|----- 0|1|2|3.5|2005-04-02 23:53:47 @@ -172,7 +169,7 @@ ml[:10] 7|1|223|4.0|2005-04-02 23:46:13 8|1|253|4.0|2005-04-02 23:35:40 9|1|260|4.0|2005-04-02 23:33:46 -
(138494, 131263, 18179137922)
-
0.11%
-
| movie_title | num_ratings | avg_rating | movie_id | | |
|------------|------------|-----------|---------|-|-|-
296|Pulp Fiction (1994)|67310.0|4.174231
@@ -270,7 +267,7 @@ movie_stats.sort_values("num_ratings", ascending=False)[:25]
608|Fargo (1996)|43272.0|4.112359
47|Seven (a.k.a. Se7en) (1995)|43249.0|4.053493
380|True Lies (1994)|43159.0|3.491149
-
<138494x131263 sparse matrix of type ''
with 15000197 stored elements in Compressed Sparse Column format>
- | user_id | movie_id | rating | time |--------|---------|-------|----- 13746918|94976|7371|4.5|2009-11-04 05:51:26 @@ -376,7 +373,7 @@ ml_train[:10] 15311014|105846|4226|4.5|2004-07-30 18:12:26 8514776|58812|1285|4.0|2000-04-24 20:39:46 3802643|25919|3275|2.5|2010-06-18 00:48:40 -
[4.5, 3.0, 3.0, 4.5, 4.0, 2.5, 5.0, 4.5, 4.0, 2.5]
-
[4.5, 3.0, 3.0, 4.5, 4.0, 2.5, 5.0, 4.5, 4.0, 2.5]
-| movie_title | user_id | movie_id | rating | time |------------|--------|---------|-------|----- 4229884|Jumanji (1995)|28812|2|5.0|1996-09-23 02:08:39 @@ -499,7 +496,7 @@ names.merge(ml_train[ml_train.user_id == target_user], right_on="movie_id", left 4229957|Independence Day (a.k.a. ID4) (1996)|28812|780|5.0|1996-09-23 02:09:02 4229959|Phenomenon (1996)|28812|802|5.0|1996-09-23 02:09:02 4229960|Die Hard (1988)|28812|1036|5.0|1996-09-23 02:09:02 -
| movie_title | movie_id | |------------|---------|- 586|Home Alone (1990) -
| movie_id_x | user_id | rating_x | rating_y |-----------|--------|---------|--------- 0|329|17593|3.0|4.0 @@ -555,7 +552,7 @@ users_df 522688|2|126271|3.0|4.0 522689|595|82760|2.0|4.0 522690|595|18306|4.5|5.0 -
| movie_id_x | user_id | rating_x | rating_y | rating_dev |-----------|--------|---------|---------|----------- 0|329|17593|3.0|4.0|1.0 @@ -586,7 +583,7 @@ users_df 522688|2|126271|3.0|4.0|1.0 522689|595|82760|2.0|4.0|2.0 522690|595|18306|4.5|5.0|0.5 -
| movie_title | rating_dev
|------------|-----------
318|Shawshank Redemption, The (1994)|-1.391784
@@ -628,7 +625,7 @@ names.join(rating_dev, how="inner").sort_values("rating_dev")
173|Judge Dredd (1995)|0.518570
19|Ace Ventura: When Nature Calls (1995)|0.530155
160|Congo (1995)|0.559034
-
| user_id | movie_id | rating | rating_adj | movie_title
|--------|---------|-------|-----------|------------
4229920|28812|344|3.0|3.141987|Ace Ventura: Pet Detective (1994)
@@ -673,7 +670,7 @@ df.join(names, on="movie_id").sort_values("movie_title")
4229892|28812|50|3.0|1.683520|Usual Suspects, The (1995)
4229903|28812|208|3.0|3.250881|Waterworld (1995)
4229919|28812|339|4.0|3.727966|While You Were Sleeping (1995)
-
4.087520122528076
-
4.0
-
| movie_title | num_ratings
|------------|------------
802|Phenomenon (1996)|3147
@@ -754,7 +751,7 @@ names.join(num_ratings, how="inner").sort_values("num_ratings")
593|Silence of the Lambs, The (1991)|12120
480|Jurassic Park (1993)|13546
356|Forrest Gump (1994)|13847
-| user_id | movie_id | rating | rating_adj | num_ratings | rating_weighted |--------|---------|-------|-----------|------------|---------------- 4229918|28812|329|4.0|3.767164|6365|23978.000326 @@ -809,7 +806,7 @@ df 4229912|28812|296|4.0|2.883755|11893|34296.500678 4229884|28812|2|5.0|4.954595|7422|36773.001211 4229953|28812|595|4.0|3.515051|9036|31761.999825 -
4.02968199025023
-
(4.0875210502743862, 4.0875210502743862)
-
Training error: MAE=0.640, RMSE=0.834
Testing error: MAE=0.657, RMSE=0.856
-
6982/s 8928/s 10378/s 12877/s 15290/s 11574/s 13230/s
Epoch 01/20; Training: MAE=0.674 RMSE=0.874, Testing: MAE=0.677 RMSE=0.879
@@ -1423,7 +1420,7 @@ svd40.train(movies_train, users_train, ratings_train, epoch_callback=at_epoch)
52078/s 18671/s 9292/s 11493/s 12515/s 11760/s 13039/s
Epoch 20/20; Training: MAE=0.549 RMSE=0.717, Testing: MAE=0.600 RMSE=0.787
-
48199/s 33520/s 16937/s 13842/s 13607/s 15574/s 15431/s
Epoch 01/20; Training: MAE=0.674 RMSE=0.875, Testing: MAE=0.677 RMSE=0.878
@@ -1487,7 +1484,7 @@ svd4.train(ml_train["movie_id"].values, ml_train["user_id"].values, ml_train["ra
6090/s 11341/s 15532/s 18298/s 17158/s 14908/s 16898/s
Epoch 20/20; Training: MAE=0.599 RMSE=0.783, Testing: MAE=0.618 RMSE=0.809
-| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|--- 0|||||||||||||||| @@ -1614,7 +1611,7 @@ latent_factor_grid(svd4.q[:2,:]) 13||||||||Sound of Music; Spy Kids 2: The Island of Lost...|Bring It On; Legally Blonde|Fly Away Home; Parent Trap|Sense and Sensibility; Sex and the City||||| 14|||||||Babe; Babe: Pig in the City||||Twilight||||| 15|||||||||||||||| -
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|--- 0|||||||||||||||| @@ -1651,7 +1648,7 @@ latent_factor_grid(svd4.q[2:,:]) 13||||||Nightmare on Elm Street 4: The Dream Master; F...|Wes Craven's New Nightmare (Nightmare on Elm S...|Friday the 13th; Exorcist III|Candyman; Texas Chainsaw Massacre 2|Mars Attacks!; Halloween|Evil Dead II (Dead by Dawn); Re-Animator|Night of the Living Dead; Dead Alive (Braindead)||Eraserhead|| 14|||||||Nightmare on Elm Street 3: Dream Warriors; Fre...|Hellbound: Hellraiser II|Nightmare on Elm Street||||||| 15|||||||Bride of Chucky (Child's Play 4)||||Texas Chainsaw Massacre||||| -
| movie_title | num_ratings | avg_rating | bias | movie_id | | | | |------------|------------|-----------|-----|---------|-|-|-|- 318|Shawshank Redemption, The (1994)|63366.0|4.446990|1.015911 @@ -1686,7 +1683,7 @@ bias.iloc[:10] 50|Usual Suspects, The (1995)|47006.0|4.334372|0.910651 102217|Bill Hicks: Revelations (1993)|50.0|3.990000|0.900622 527|Schindler's List (1993)|50054.0|4.310175|0.898633 -
| movie_title | num_ratings | avg_rating | bias | movie_id | | | | |------------|------------|-----------|-----|---------|-|-|-|- 8859|SuperBabies: Baby Geniuses 2 (2004)|209.0|0.837321|-2.377202 @@ -1712,7 +1709,7 @@ bias.iloc[:-10:-1] 4775|Glitter (2001)|685.0|1.124088|-2.047287 31698|Son of the Mask (2005)|467.0|1.252677|-2.022763 5739|Faces of Death 6 (1996)|174.0|1.261494|-2.004086 -
| Library | Algorithm | MAE (test) | RMSE (test) |--------|----------|-----------|------------ 0||Slope One|0.656514|0.856294 @@ -1768,7 +1765,7 @@ pd.DataFrame.from_records( 2|Surprise|Random|1.144775|1.433753 3|Surprise|Slope One|0.704730|0.923331 4|Surprise|SVD|0.694890|0.900350 -