/*****************************************************************************************************
 * Use the neighbourhood set to predict ratings for the user based on Resnick's formula
 * *****************************************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <cmath>

typedef struct {
	int uid;
	double sim;
	double avg;
	int rating;
} users;

double aggr(int us, int mov, FILE *neigh, FILE *train) {
	int user, movie, rating;
	int cur_user;
	double cur_sim;
	char *buf;
	char ch;
	int us_pos;
	int us_i;
	int c_i;
	int i;
	int found;
	double avg_user;
	double k=0.0;
	double res=0.0;
	users c[30];
	buf = (char*)malloc(sizeof(char)*1000);
	rewind(neigh); 
	rewind(train);
	cur_user=0;
	c_i = 0;

	/*****************************************************************************************************
	 * Find the given user's neighbours
	 * *****************************************************************************************************/
	while(cur_user!=us) {
		us_pos=ftell(neigh);
		fgets(buf, 1000, neigh);
		sscanf(buf, "%d->", &cur_user);
	}
	fseek(neigh, us_pos, SEEK_SET);
	fscanf(neigh, "%d->", &user);\
	ch = fgetc(neigh);

	/*****************************************************************************************************
	 * Create the set c. C contains the neighbouring users of us who've rated movie mov
	 * *****************************************************************************************************/
	while(ch!='\n') {
		avg_user = 0;
		us_i = 0;
		fscanf(neigh, ":%d;%lf", &cur_user, &cur_sim);
		rewind(train);
		fscanf(train, "%d%d%d", &user, &movie, &rating);
		found=0;
		while((user<=cur_user) && (!feof(train))) {
			if(user==cur_user) {
				avg_user += (double)(rating);
				us_i++;
				if(movie==mov) {
					found=1;
					c[c_i].uid = cur_user;
					c[c_i].sim = cur_sim;
					c[c_i].rating = rating;
				}
			}
			fscanf(train, "%d%d%d", &user, &movie, &rating);
		}
		if(found==1) {
			avg_user = avg_user/((double)(us_i));
			c[c_i++].avg = avg_user;
		}
		ch = fgetc(neigh);
	}

	/*****************************************************************************************************
	 * Find the average rating given by user us
	 * *****************************************************************************************************/
	rewind(train);
	avg_user = 0;
	us_i = 0;
	fscanf(train, "%d%d%d", &user, &movie, &rating);
	while((user<=us) && (!feof(train))) {
		if(user==us) {
			avg_user += (double)(rating);
			us_i++;
		}
		fscanf(train, "%d\%d\%d", &user, &movie, &rating);
	}
	avg_user = avg_user/((double)(us_i));

	for(i=0; i<c_i; i++) {
		res += c[i].sim * ((double)(c[i].rating)-c[i].avg);
		k += fabsl(c[i].sim);
	}

	if(k==0) return avg_user;
	k = (double)(1.0)/k;
	res = res*k;
	res += avg_user;

	return res;
}

int main() {
	FILE *neigh; 
	FILE *train;
	FILE *test;
  FILE *r;
	int user, movie, rating;
	double pred;
	double mae;
	int n;
	int user_pos;
	int cur_user;
	double mae_list[500];
	int mae_i=0;
	test = fopen("test.dat", "r");
	neigh = fopen("neighbours.dat", "r");
	train = fopen("train.dat", "r");
  r = fopen("predict.dat","w");
  double rmse =0;
  int total_cases = 0;
	while(!feof(test)) {
		fscanf(test, "%d%d%d\n", &user, &movie, &rating);
		mae  = 0.0;
		n = 0;
		cur_user = user;

		while(user==cur_user) {
			pred = aggr(user, movie, neigh, train);
      if(std::isnan(pred))
      {
        fscanf(test, "%d%d%d\n", &user, &movie, &rating);
        fprintf(r,"%d %d %lf\n", user, movie, 0);
        continue;
      }
      total_cases++;
			mae += fabsl(pred-rating);
      rmse += ((pred-rating)*(pred-rating));
			fprintf(r,"%d %d %lf\n", user, movie, pred);
			n++;
			user_pos=ftell(test);
			if(feof(test)) break;
			fscanf(test, "%d%d%d\n", &user, &movie, &rating);
		}
		mae = mae/((double)(n));
		mae_list[mae_i++] = mae;
		if(feof(test)) break;
		fseek(test, user_pos, SEEK_SET);
	}

  rmse = sqrt(rmse/total_cases);
  printf("\nrmse: %f\n",rmse);
  printf("%d\n",mae_i);
	mae = 0.0;
	for(n=0; n<mae_i; n++) 
		mae += mae_list[n];

	mae = mae/mae_i;
	printf("\n\nMAE:%lf\n", mae);

	return 0;
}