Question

1 Approved Answer

Posted on Aug 23, 2024

I want the RMSE value to be reduced even more, I think methods such as matrix factorization or singular value decomposition can be used, but

I want the RMSE value to be reduced even more, I think methods such as matrix factorization or singular value decomposition can be used, but only the standard C++ library should be used. Apart from changing this method, it would be very nice if changes that can reduce the RMSE value can be made. What I want from you is to integrate the changes that you believe can lower this value even more into the code. Thank you so much. Please don't spam.

#include #include #include #include #include #include #include #include

// A struct to represent a rating given by a user to an item struct Rating { int user_id; int item_id; float rating; };

// A struct to represent a predicted rating for an item struct PredictedRating { int id; int user_id; int item_id; float rating; };

// Reads the ratings from a .csv file and returns them as a vector std::vector read_ratings(const std::string &filename) { std::vector ratings;

std::ifstream file(filename); if (file.is_open()) { std::string line; // Skip the first line (header) std::getline(file, line); while (std::getline(file, line)) { Rating rating; std::sscanf(line.c_str(), "%d,%d,%f", &rating.user_id, &rating.item_id, &rating.rating); ratings.push_back(rating); } file.close(); }

return ratings; }

// Reads the test cases from a .csv file and returns them as a vector std::vector read_test_cases(const std::string &filename) { std::vector test_cases;

std::ifstream file(filename); if (file.is_open()) { std::string line; // Skip the first line (header) std::getline(file, line); while (std::getline(file, line)) { PredictedRating test_case; std::sscanf(line.c_str(), "%d,%d,%d,%f", &test_case.id, &test_case.user_id, &test_case.item_id, &test_case.rating); test_cases.push_back(test_case); } file.close(); }

return test_cases; }

// Calculates the root mean squared error between the predicted ratings and the actual ratings

float calculate_rmse(const std::vector &predicted_ratings, const std::vector &actual_ratings) { float sum_squared_error = 0.0f; for (const auto &predicted_rating : predicted_ratings) { auto actual_rating_iter = std::find_if(actual_ratings.begin(), actual_ratings.end(), [&](const Rating &r) { return r.user_id == predicted_rating.user_id && r.item_id == predicted_rating.item_id; }); if (actual_rating_iter != actual_ratings.end()) { sum_squared_error += std::pow(predicted_rating.rating - actual_rating_iter->rating, 2); } } return std::sqrt(sum_squared_error / predicted_ratings.size()); } // Calculates the dot product of two vectors float dot_product(const std::unordered_map &vec1, const std::unordered_map &vec2) { float result = 0.0f; for (const auto &[key, value] : vec1) { if (vec2.count(key) > 0) { result += value * vec2.at(key); } } return result; }

// Calculates the cosine similarity between two vectors float cosine_similarity(const std::unordered_map &vec1, const std::unordered_map &vec2) { return dot_product(vec1, vec2) / (std::sqrt(dot_product(vec1, vec1)) * std::sqrt(dot_product(vec2, vec2))); }

float predict_rating_cosine(int user_id, int item_id, const std::unordered_map>& user_ratings) { // Create a vector of ratings for the given user std::unordered_map user_vec; // item_id -> rating for (const auto& rating : user_ratings.at(user_id)) { // for each rating of the user user_vec[rating.item_id] = rating.rating; // add the item_id and the rating to the vector }

// Find all other users who have rated the same item std::vector other_users; // vector of user_ids for (const auto& [other_user_id, ratings] : user_ratings) { // for each user for (const auto& rating : ratings) { // for each rating of the user if (rating.item_id == item_id) { // if the item_id is the same as the item_id we are trying to predict other_users.push_back(other_user_id); // add the user_id to the vector break; } } }

// Calculate the weighted average of the ratings given by the other users float sum_similarities = 0.0f; // sum of the similarities float sum_ratings = 0.0f; // sum of the ratings for (int other_user_id : other_users) { // for each user // Create a vector of ratings for the other user std::unordered_map other_user_vec; // item_id -> rating for (const auto& rating : user_ratings.at(other_user_id)) { // for each rating of the other user other_user_vec[rating.item_id] = rating.rating; // add the item_id and the rating to the vector }

// Calculate the cosine similarity between the two vectors float similarity = cosine_similarity(user_vec, other_user_vec); // similarity between the two vectors

// Find the rating given by the other user for the item float rating = 0.0f; // rating given by the other user for the item for (const auto& r : user_ratings.at(other_user_id)) { // for each rating of the other user if (r.item_id == item_id) { // if the item_id is the same as the item_id we are trying to predict rating = r.rating; // set the rating break; } }

// Update the sums sum_similarities += similarity; // add the similarity to the sum of similarities sum_ratings += similarity * rating; // add the weighted rating to the sum of ratings }

// Return the weighted average if (sum_similarities > 0) { // if there are other users who have rated the item return sum_ratings / sum_similarities; // return the weighted average } else { // if there are no other users who have rated the item // If there are no other users who have rated the item, return the average rating of all items float sum_ratings = 0.0f; // sum of the ratings int count = 0; for (const auto& [_, ratings] : user_ratings) { // for each user for (const auto& rating : ratings) { // for each rating of the user sum_ratings += rating.rating; // add the rating to the sum of ratings count++; // increment the count } } return sum_ratings / count; // return the average rating } }

// Predict the rating for a given user and item using cosine similarity float predict_rating_cosine(int user_id, int item_id, const std::unordered_map> &user_item_ratings) { // Get the ratings for the given user and all other users const auto &user_ratings = user_item_ratings.at(user_id); std::vector>> other_user_ratings; for (const auto &[other_user, ratings] : user_item_ratings) { if (other_user != user_id) { other_user_ratings.emplace_back(other_user, ratings); } }

// Calculate the cosine similarity between the given user and all other users std::vector> similarities; for (const auto &[other_user, ratings] : other_user_ratings) { float similarity = cosine_similarity(user_ratings, ratings); if (similarity > 0) { similarities.emplace_back(other_user, similarity); } }

// Sort the users by their similarity to the given user std::sort(similarities.begin(), similarities.end(), [](const auto &a, const auto &b) { return a.second > b.second; });

// Use the top k most similar users to predict the rating constexpr int k = 50; if (similarities.size() < k) { return 0.0f; } float sum_similarities = 0.0f; float sum_ratings = 0.0f; for (int i = 0; i < k; i++) { int other_user = similarities[i].first; float similarity = similarities[i].second; float rating = user_item_ratings.at(other_user).at(item_id); sum_similarities += similarity; sum_ratings += similarity * rating; } if (sum_similarities == 0) { return 0.0f; } return sum_ratings / sum_similarities; }

int main() { // Read in the training and test data std::vector training_set = read_ratings("train.csv"); std::vector test_set = read_test_cases("test.csv");

// Initialize a map to store the ratings given by each user std::unordered_map> user_ratings; for (const auto &rating : training_set) { user_ratings[rating.user_id].push_back(rating); }

// Initialize a map to store the ratings received by each item std::unordered_map> item_ratings; for (const auto &rating : training_set) { item_ratings[rating.item_id].push_back(rating); }

std::fstream fout;

// opens an existing csv file or creates a new file. fout.open("report.csv", std::ios::out | std::ios::app);

// Predict the ratings for the test set using the mean rating of the user for (auto &predicted_rating : test_set) { predicted_rating.rating = predict_rating_cosine(predicted_rating.user_id, predicted_rating.item_id, user_ratings); fout << std::to_string(predicted_rating.id) << "," << std::to_string(predicted_rating.rating) << " "; }

// Calculate the root mean squared error float rmse = calculate_rmse(test_set, training_set); std::cout << "RMSE: " << rmse << std::endl;

return 0; }