In [1]:
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier as RandomForest
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
import pickle
import os
import joblib

# import open bandit pipeline (obp)
import obp
from obp.policy import (
    IPWLearner, 
    QLearner,
    NNPolicyLearner, 
    Random
)

In [3]:
reranked_output_file = './exp00000/result/out-1.txt' #need a double for slash before r (carriage return)
item_feature_file = '../dataOFAiR/item-features.csv'

In [4]:
def ipw_train_learner(context,actions,max_iter=5000):
    
    # define NNPolicyLearner with IPW as its objective function
    ipw_rf = IPWLearner(
        n_actions=len(actions),
        base_classifier = LogisticRegression(random_state=0,max_iter=5000,C=500).fit(context, actions)
        
        #base_classifier=RandomForest(
        #n_estimators=30, min_samples_leaf=10, random_state=12345
        #)
    )


    rewards = np.ones(len(context))

    # train NNPolicyLearner on the training set of logged bandit data
    ipw_rf.fit(
        context=context,
        action=actions,
        reward=rewards
    )
    
    return ipw_rf

def agg_fnc(x):
    d = []
    d.append(','.join(map(str,x["itemID"])))
    d.append(','.join(map(str,x["rating"])))
    return pd.Series(d,index=["L","L_rating"])
             

In [5]:
#Read and process data
df = pd.read_csv(reranked_output_file,header=None)#[:1000] #only reading the top 1000

df_items = pd.read_csv(item_feature_file,header=None)
df_items.columns = ["itemID","condition","num"]

df.columns = ['userID','itemID','rating']
df = df.sort_values(by="rating")
df.head(),df.shape

(       userID  itemID  rating
 33549     671    8199     0.0
 2099       42   36931     0.0
 15199     304     889     0.0
 27549     551     670     0.0
 15249     305    9010     0.0,
 (33550, 3))

In [6]:
#Number of uniques item ids
len(df_items.itemID.unique())

2830

In [7]:
#Create column of lists (L) and expand it into columns
df_L = df.groupby('userID')[['itemID','rating']].apply(agg_fnc).reset_index()
df_L2 = df_L['L'].str.split(",", expand=True)
len_list = df_L2.shape[1];len_list

50

In [8]:
#Create action, position, and context
# actions = itemID
# posisiton = rank in list
# context = userID
df_L2_stack = df_L2.stack()
df_L2_stack.name = "action"
df_input = df_L2_stack.to_frame().reset_index(level=1).join(df_L["userID"])
df_input.columns = ['position','action','context']

In [9]:
#encode itemID to use consecutive integers (reduces size of action matrix)

item_encoder = LabelEncoder().fit(df_items['itemID'])

df_input["action"] = item_encoder.transform(df_input["action"].astype(int))

In [10]:
#This must be set to the max value of the itemIDs in the item dataset
#max_actions =df_input['action'].astype(int).max() +1
max_actions = np.max(item_encoder.transform(df_items['itemID'].astype(int))) + 1

actions = df_input["action"].astype(int).values.reshape(-1,)
actions
max_actions

2830

In [11]:
#Define NNPolicyLearner with IPW as its objective function
ipw_rf = IPWLearner(
    n_actions= int(max_actions),
    base_classifier = RandomForest(n_estimators=1000,random_state=0),
    len_list = len(df_input["position"].unique())
    )


#Convert df_input field to input for the learner
context = df_input["context"].astype(int).values.reshape(-1,1)
actions = df_input["action"].astype(int).values.reshape(-1,)
positions = df_input["position"].astype(int).values.reshape(-1,)
rewards = np.ones(df_input.shape[0])
context.shape,actions.shape,rewards.shape,positions.shape
 
#Train learner
ipw_rf.fit(
    context=context,
    action=actions,
    reward=rewards,
    position=positions
)

In [12]:
#Make prediction  - for testing
all_users = np.unique(df_L["userID"]).reshape(-1,1)
results = ipw_rf.predict(context=all_users)
results.shape

(671, 2830, 50)

In [13]:
#Transform results to list(L)
results[0].T.argmax(axis=1)

array([2699, 2335, 2825, 2673, 1801, 2054,  553,   57, 2219, 2660, 1714,
       2780, 1580, 1239, 1316, 2364, 2262, 2693, 1546,  266, 2581, 2450,
       1613, 2192, 1152, 1786, 1945, 1464,  375,  536, 2629, 2129, 2097,
       2101, 1242, 1539,  620, 1598,  902, 1551, 1996, 1719, 2441, 1576,
       2276, 2244, 1973, 1724,  885,  777])

In [16]:
#Save Learner
rerank_dict = {}
rerank_dict["OFAiR"] = (ipw_rf,item_encoder)
with open('OBP_Rerankers.pickle', 'wb') as f:
    pickle.dump(rerank_dict, f)
f.close()
#To use joblib if necessary
#filename = os.path.join('.', 'OBP_Rerankers.joblib')
#joblib.dump(rerank_dict,filename)