#hide
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()Collaborative Filtering Deep Dive
#hide
from fastbook import *A First Look at the Data
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
names=['user','movie','rating','timestamp'])
ratings.head()last_skywalker = np.array([0.98,0.9,-0.9])user1 = np.array([0.9,0.8,-0.6])(user1*last_skywalker).sum()casablanca = np.array([-0.99,-0.3,0.8])(user1*casablanca).sum()Learning the Latent Factors
Creating the DataLoaders
movies = pd.read_csv(path/'u.item', delimiter='|', encoding='latin-1',
usecols=(0,1), names=('movie','title'), header=None)
movies.head()ratings = ratings.merge(movies)
ratings.head()dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)
dls.show_batch()dls.classesn_users = len(dls.classes['user'])
n_movies = len(dls.classes['title'])
n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)one_hot_3 = one_hot(3, n_users).float()user_factors.t() @ one_hot_3user_factors[3]Collaborative Filtering from Scratch
class Example:
def __init__(self, a): self.a = a
def say(self,x): return f'Hello {self.a}, {x}.'ex = Example('Sylvain')
ex.say('nice to meet you')class DotProduct(Module):
def __init__(self, n_users, n_movies, n_factors):
self.user_factors = Embedding(n_users, n_factors)
self.movie_factors = Embedding(n_movies, n_factors)
def forward(self, x):
users = self.user_factors(x[:,0])
movies = self.movie_factors(x[:,1])
return (users * movies).sum(dim=1)x,y = dls.one_batch()
x.shapemodel = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())learn.fit_one_cycle(5, 5e-3)class DotProduct(Module):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
self.user_factors = Embedding(n_users, n_factors)
self.movie_factors = Embedding(n_movies, n_factors)
self.y_range = y_range
def forward(self, x):
users = self.user_factors(x[:,0])
movies = self.movie_factors(x[:,1])
return sigmoid_range((users * movies).sum(dim=1), *self.y_range)model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)class DotProductBias(Module):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
self.user_factors = Embedding(n_users, n_factors)
self.user_bias = Embedding(n_users, 1)
self.movie_factors = Embedding(n_movies, n_factors)
self.movie_bias = Embedding(n_movies, 1)
self.y_range = y_range
def forward(self, x):
users = self.user_factors(x[:,0])
movies = self.movie_factors(x[:,1])
res = (users * movies).sum(dim=1, keepdim=True)
res += self.user_bias(x[:,0]) + self.movie_bias(x[:,1])
return sigmoid_range(res, *self.y_range)model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)Weight Decay
x = np.linspace(-2,2,100)
a_s = [1,2,5,10,50]
ys = [a * x**2 for a in a_s]
_,ax = plt.subplots(figsize=(8,6))
for a,y in zip(a_s,ys): ax.plot(x,y, label=f'a={a}')
ax.set_ylim([0,5])
ax.legend();model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)Creating Our Own Embedding Module
class T(Module):
def __init__(self): self.a = torch.ones(3)
L(T().parameters())class T(Module):
def __init__(self): self.a = nn.Parameter(torch.ones(3))
L(T().parameters())class T(Module):
def __init__(self): self.a = nn.Linear(1, 3, bias=False)
t = T()
L(t.parameters())type(t.a.weight)def create_params(size):
return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))class DotProductBias(Module):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
self.user_factors = create_params([n_users, n_factors])
self.user_bias = create_params([n_users])
self.movie_factors = create_params([n_movies, n_factors])
self.movie_bias = create_params([n_movies])
self.y_range = y_range
def forward(self, x):
users = self.user_factors[x[:,0]]
movies = self.movie_factors[x[:,1]]
res = (users*movies).sum(dim=1)
res += self.user_bias[x[:,0]] + self.movie_bias[x[:,1]]
return sigmoid_range(res, *self.y_range)model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)Interpreting Embeddings and Biases
movie_bias = learn.model.movie_bias.squeeze()
idxs = movie_bias.argsort()[:5]
[dls.classes['title'][i] for i in idxs]idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]g = ratings.groupby('title')['rating'].count()
top_movies = g.sort_values(ascending=False).index.values[:1000]
top_idxs = tensor([learn.dls.classes['title'].o2i[m] for m in top_movies])
movie_w = learn.model.movie_factors[top_idxs].cpu().detach()
movie_pca = movie_w.pca(3)
fac0,fac1,fac2 = movie_pca.t()
idxs = list(range(50))
X = fac0[idxs]
Y = fac2[idxs]
plt.figure(figsize=(12,12))
plt.scatter(X, Y)
for i, x, y in zip(top_movies[idxs], X, Y):
plt.text(x,y,i, color=np.random.rand(3)*0.7, fontsize=11)
plt.show()Using fastai.collab
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))learn.fit_one_cycle(5, 5e-3, wd=0.1)learn.modelmovie_bias = learn.model.i_bias.weight.squeeze()
idxs = movie_bias.argsort(descending=True)[:5]
[dls.classes['title'][i] for i in idxs]Embedding Distance
movie_factors = learn.model.i_weight.weight
idx = dls.classes['title'].o2i['Silence of the Lambs, The (1991)']
distances = nn.CosineSimilarity(dim=1)(movie_factors, movie_factors[idx][None])
idx = distances.argsort(descending=True)[1]
dls.classes['title'][idx]Bootstrapping a Collaborative Filtering Model
Deep Learning for Collaborative Filtering
embs = get_emb_sz(dls)
embsclass CollabNN(Module):
def __init__(self, user_sz, item_sz, y_range=(0,5.5), n_act=100):
self.user_factors = Embedding(*user_sz)
self.item_factors = Embedding(*item_sz)
self.layers = nn.Sequential(
nn.Linear(user_sz[1]+item_sz[1], n_act),
nn.ReLU(),
nn.Linear(n_act, 1))
self.y_range = y_range
def forward(self, x):
embs = self.user_factors(x[:,0]),self.item_factors(x[:,1])
x = self.layers(torch.cat(embs, dim=1))
return sigmoid_range(x, *self.y_range)model = CollabNN(*embs)learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100,50])
learn.fit_one_cycle(5, 5e-3, wd=0.1)@delegates(TabularModel)
class EmbeddingNN(TabularModel):
def __init__(self, emb_szs, layers, **kwargs):
super().__init__(emb_szs, layers=layers, n_cont=0, out_sz=1, **kwargs)Conclusion
Questionnaire
- What problem does collaborative filtering solve?
- How does it solve it?
- Why might a collaborative filtering predictive model fail to be a very useful recommendation system?
- What does a crosstab representation of collaborative filtering data look like?
- Write the code to create a crosstab representation of the MovieLens data (you might need to do some web searching!).
- What is a latent factor? Why is it “latent”?
- What is a dot product? Calculate a dot product manually using pure Python with lists.
- What does
pandas.DataFrame.mergedo? - What is an embedding matrix?
- What is the relationship between an embedding and a matrix of one-hot-encoded vectors?
- Why do we need
Embeddingif we could use one-hot-encoded vectors for the same thing? - What does an embedding contain before we start training (assuming we’re not using a pretained model)?
- Create a class (without peeking, if possible!) and use it.
- What does
x[:,0]return? - Rewrite the
DotProductclass (without peeking, if possible!) and train a model with it. - What is a good loss function to use for MovieLens? Why?
- What would happen if we used cross-entropy loss with MovieLens? How would we need to change the model?
- What is the use of bias in a dot product model?
- What is another name for weight decay?
- Write the equation for weight decay (without peeking!).
- Write the equation for the gradient of weight decay. Why does it help reduce weights?
- Why does reducing weights lead to better generalization?
- What does
argsortdo in PyTorch? - Does sorting the movie biases give the same result as averaging overall movie ratings by movie? Why/why not?
- How do you print the names and details of the layers in a model?
- What is the “bootstrapping problem” in collaborative filtering?
- How could you deal with the bootstrapping problem for new users? For new movies?
- How can feedback loops impact collaborative filtering systems?
- When using a neural network in collaborative filtering, why can we have different numbers of factors for movies and users?
- Why is there an
nn.Sequentialin theCollabNNmodel? - What kind of model should we use if we want to add metadata about users and items, or information such as date and time, to a collaborative filtering model?
Further Research
- Take a look at all the differences between the
Embeddingversion ofDotProductBiasand thecreate_paramsversion, and try to understand why each of those changes is required. If you’re not sure, try reverting each change to see what happens. (NB: even the type of brackets used inforwardhas changed!) - Find three other areas where collaborative filtering is being used, and find out what the pros and cons of this approach are in those areas.
- Complete this notebook using the full MovieLens dataset, and compare your results to online benchmarks. See if you can improve your accuracy. Look on the book’s website and the fast.ai forum for ideas. Note that there are more columns in the full dataset—see if you can use those too (the next chapter might give you ideas).
- Create a model for MovieLens that works with cross-entropy loss, and compare it to the model in this chapter.