This post is dedicated to popularity baseline in Lenskit 2 framework. The framework lacks this baseline. I therefore provide an implementation and demonstrate the results of the baseline.
The implementation includes three classes:
PopItemScorer - items scorer, which provides actual scores for items
PopModel - model that contains popularity of each item
PopModelBuilder - builder calculates popularity for each items and puts them to the model.
Popularity algorithm is famous for being a very strong baseline in learning to rank problem (http://technocalifornia.blogspot.fi/2013/07/recommendations-as-personalized.html).
Here are results I obtained using Lenskit framework:
SVD was trained using 50 features and 1000 iterations.
Each evaluated algorithm ranked items from the whole universe of items.
According to the obtained results. popularity baseline outperforms the algorithms implemented in lenskit framework in this evaluation setting.
Popularity baseline is very strong, because in this evaluation setting we assume that unrated items are irrelevant. Popularity is not very good at predicting items relevant for a user. It is good at predicting which item a user is going to rate, as users rate popular items on average.
In another evaluation setting, where algorithms ranked only test items (rated), popularity baseline is not as strong as in the first experimental setting.
The observation might indicate that the second experimental setting is more fair. However, to the best of my knowledge, the number of papers that indicate this observation are very limited.
The implementation includes three classes:
PopItemScorer - items scorer, which provides actual scores for items
PopModel - model that contains popularity of each item
PopModelBuilder - builder calculates popularity for each items and puts them to the model.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bionic.pop; | |
import org.grouplens.lenskit.basic.AbstractItemScorer; | |
import org.grouplens.lenskit.data.dao.UserEventDAO; | |
import org.grouplens.lenskit.data.event.Rating; | |
import org.grouplens.lenskit.data.history.UserHistory; | |
import org.grouplens.lenskit.vectors.MutableSparseVector; | |
import org.grouplens.lenskit.vectors.VectorEntry; | |
import javax.annotation.Nonnull; | |
import javax.inject.Inject; | |
import java.util.List; | |
public class PopItemScorer extends AbstractItemScorer { | |
private PopModel model; | |
@Inject | |
public PopItemScorer(PopModel model) { | |
this.model = model; | |
} | |
@Override | |
public void score(long user, @Nonnull MutableSparseVector scores) { | |
List<Long> recommendations = model.getItemList(); | |
for(VectorEntry e : scores.view(VectorEntry.State.EITHER)){ | |
int score = recommendations.indexOf(e.getKey()); | |
scores.set(e, score); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bionic.pop; | |
import org.grouplens.lenskit.core.Transient; | |
import org.grouplens.lenskit.data.pref.IndexedPreference; | |
import org.grouplens.lenskit.data.snapshot.PreferenceSnapshot; | |
import javax.annotation.Nonnull; | |
import javax.inject.Inject; | |
import javax.inject.Provider; | |
import java.util.HashMap; | |
import java.util.Map; | |
public class PopModelBuilder implements Provider<PopModel> { | |
protected final PreferenceSnapshot snapshot; | |
@Inject | |
public PopModelBuilder(@Transient @Nonnull PreferenceSnapshot snapshot) { | |
this.snapshot = snapshot; | |
} | |
@Override | |
public PopModel get() { | |
Map<Long, Container> itemMap = new HashMap<Long, Container>(); | |
for (IndexedPreference rating : snapshot.getRatings()) { | |
Container container = new Container(rating.getItemId()); | |
if (itemMap.containsKey(rating.getItemId())) { | |
container = itemMap.get(rating.getItemId()); | |
} | |
container.addRating(); | |
itemMap.put(rating.getItemId(), container); | |
} | |
return new PopModel(itemMap); | |
} | |
public static class Container implements Comparable<Container> { | |
private Long id; | |
private Integer ratingNumber = 0; | |
private Container(Long id) { | |
this.id = id; | |
} | |
public void addRating() { | |
ratingNumber++; | |
} | |
public Integer getRatingNumber() { | |
return ratingNumber; | |
} | |
public Long getId() { | |
return id; | |
} | |
@Override | |
public int compareTo(Container o) { | |
return ratingNumber.compareTo(o.ratingNumber); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bionic.pop; | |
import org.grouplens.grapht.annotation.DefaultProvider; | |
import org.grouplens.lenskit.core.Shareable; | |
import java.io.Serializable; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.List; | |
import java.util.Map; | |
@DefaultProvider(PopModelBuilder.class) | |
@Shareable | |
public class PopModel implements Serializable { | |
private Map<Long, PopModelBuilder.Container> itemMap; | |
public PopModel(Map<Long, PopModelBuilder.Container> itemMap) { | |
this.itemMap = itemMap; | |
} | |
public Integer getPop(Long itemId) { | |
if (!itemMap.containsKey(itemId)) { | |
return 0; | |
} | |
return itemMap.get(itemId).getRatingNumber(); | |
} | |
public Map<Long, PopModelBuilder.Container> getItemMap() { | |
return itemMap; | |
} | |
public List<Long> getItemList() { | |
List<PopModelBuilder.Container> containerList = new ArrayList<PopModelBuilder.Container>(itemMap.values()); | |
Collections.sort(containerList); | |
List<Long> list = new ArrayList<Long>(); | |
for (int i = 0; i < containerList.size(); i++) { | |
list.add(containerList.get(i).getId()); | |
} | |
return list; | |
} | |
} |
Popularity algorithm is famous for being a very strong baseline in learning to rank problem (http://technocalifornia.blogspot.fi/2013/07/recommendations-as-personalized.html).
Here are results I obtained using Lenskit framework:
SVD was trained using 50 features and 1000 iterations.
Each evaluated algorithm ranked items from the whole universe of items.
According to the obtained results. popularity baseline outperforms the algorithms implemented in lenskit framework in this evaluation setting.
Popularity baseline is very strong, because in this evaluation setting we assume that unrated items are irrelevant. Popularity is not very good at predicting items relevant for a user. It is good at predicting which item a user is going to rate, as users rate popular items on average.
In another evaluation setting, where algorithms ranked only test items (rated), popularity baseline is not as strong as in the first experimental setting.
The observation might indicate that the second experimental setting is more fair. However, to the best of my knowledge, the number of papers that indicate this observation are very limited.
No comments:
Post a Comment