import java.sql.*;
import java.util.*;

/**
 *  User class, used for making comparisons between users.
 *
 * @author     Brandon Douthit-Wood
 * @created    March 31, 2004
 */
public class User {

	/**  User id */
	public String id;
	/**  User rating */
	public String rating;
	/**  hashtable of words & frequencies */
	public Hashtable wordHash;
	/** hashtable of movies user has rated */
	public Hashtable ratingHash;
	/**  User similarity - used for finding neighboorhood */
	public double similarity;

	/**
	 * Initializes User object
	 *
	 * @param  ID     id of user
	 * @param  words  word-frequeny list, colon-delimited (word:frequency)
	 */
	public User( String ID, String words ) {
		id = ID;
		similarity = 0;
		rating = "0";

		wordHash = new Hashtable();
		ratingHash = new Hashtable();
		String wordArray[] = words.split( ":" );
		String word;
		Double freq;

		// user had no words, only 'x'
		if ( wordArray.length == 1 ) {
			return;
		}
		// loop through word array, insert into wordHash
		for ( int i = 0; i < wordArray.length; i += 2 ) {
			word = wordArray[i];
			freq = new Double( wordArray[i + 1] );
			wordHash.put( word, freq );
		}
	}

	/**
	 * Initializes User object
	 *
	 * @param  ID     id of user
	 * @param  words  word-frequeny list, colon-delimited (word:frequency)
	 * @param  r      user rating
	 */
	public User( String ID, String words, String r ) {
		id = ID;
		similarity = 0;
		rating = r;

		wordHash = new Hashtable();
		String wordArray[] = words.split( ":" );
		String word;
		Double freq;

		// user had no words, only 'x'
		if ( wordArray.length == 1 ) {
			return;
		}
		// loop through word array, insert into wordHash
		for ( int i = 0; i < wordArray.length; i += 2 ) {
			word = wordArray[i];
			freq = new Double( wordArray[i + 1] );
			wordHash.put( word, freq );
		}
	}

	/**
	 *  Finds the closest neighboors from the users Vector
	 *
	 * @param  users  Users to compare to
	 * @return        Returns a Vector of the 10 closest neighboors
	 */
	public Vector findClosestNeighboors( Vector users ) {
		Vector neighboors = new Vector();
		int numNeighboors = 10;   // this may need to be varied some
		User u;
		User u2;
		boolean added = false;

		// if there aren't many users, they are all in the neighboorhood
		if ( users.size() <= numNeighboors ) {
			return users;
		}

		// perform insertion sort
		for ( int i = 0; i < users.size(); i++ ) {
			u = (User) users.elementAt( i );
			u.similarity = this.compareTo( u );

			// if no users yet, just insert at front of list
			if ( neighboors.size() == 0 ) {
				neighboors.add( u );
			}
			// otherwise, find where to insert
			else {
				for ( int j = 0; j < numNeighboors; j++ ) {
					// if past end of list, but haven't reached limit yet, insert at end of list
					if ( j >= neighboors.size() ) {
						neighboors.add( j, u );
					}
					// else check if similarity is greater than the jth element
					else {
						u2 = (User) neighboors.elementAt( j );
						if ( u.similarity > u2.similarity ) {
							neighboors.add( j, u );
						}
					}
				}
			}

			// limit size of neighboors Vector
			if ( neighboors.size() > numNeighboors ) {
				neighboors.setSize( numNeighboors );
			}
		}

		return neighboors;
	}

	/**
	 *  Gets the average rating for the given movie from the group of users given in users Vector
	 *
	 * @param  users    The users to calculate average rating from
	 * @param  movieID  The id of the movie to calculate the average rating for.
	 * @param  userID   The id of current user, we want to ignore their rating.
	 * @return          Returns the average rating for the movie.
	 */
	public int getNeighboorsRating( Vector users, String movieID, String userID ) {
		int total = 0;
		int numRatings = 0;
		int rating;
		String strRating;
		String query;
		User user;

		// get average rating from Vector of users
		for ( int i = 0; i < users.size(); i++ ) {
			user = (User) users.elementAt( i );
			// ignore current user
			if ( user.id == userID ) {
				continue;
			}

			// get rating from hashtable
			strRating = (String) user.ratingHash.get( movieID );
			if ( strRating == null ) {
				continue;
			}

			rating = ( new Integer( strRating ) ).intValue();
			total += rating;
			numRatings++;
		}

		if ( numRatings == 0 ) {
			return 0;
		}
		else {
			return (int) total / numRatings;
		}
	}

	/**
	 *  Compares two users using the Pearson Correlation Coefficient
	 *
	 * @param  u  The other user compare to
	 * @return    Returns the similarity value.
	 */
	public double compareTo( User u ) {
		int termsInCommon = 0;
		double numer;
		double denom;
		double freq1;
		double freq2;
		double x = 0;
		double x2 = 0;
		double y = 0;
		double y2 = 0;
		double xy = 0;
		String word;
		Double frequency;

		// get all words from hash table
		Enumeration enum = this.wordHash.keys();

		// enumerate through all words in hash table
		while ( enum.hasMoreElements() ) {
			word = (String) enum.nextElement();
			frequency = (Double) u.wordHash.get( word );
			// they don't have this word in common - skip
			if ( frequency == null ) {
				continue;
			}

			termsInCommon++;
			freq1 = ( (Double) this.wordHash.get( word ) ).doubleValue();
			freq2 = frequency.doubleValue();
			x += freq1;
			y += freq2;
			x2 += Math.pow( freq1, 2 );
			y2 += Math.pow( freq2, 2 );
			xy += ( freq1 * freq2 );
		}

		if ( termsInCommon == 0 ) {
			return 0;
		}

		// calculate Pearson Correlation Coefficient
		numer = ( xy - ( x * y / termsInCommon ) );
		numer = Math.abs( numer );
		denom = ( x2 - ( Math.pow( x, 2 ) / termsInCommon ) ) * ( y2 - ( Math.pow( y, 2 ) / termsInCommon ) );
		denom = Math.sqrt( Math.abs( denom ) );
		if ( denom == 0.0 ) {
			denom = 0.001;
		}
		return numer / denom;
	}

	/**
	 *  Gets all users in a cluster group
	 *
	 * @param  userID     ID of user, do not return in Vector of users
	 * @param  clusterID  ID of cluster to get users from.
	 * @return            Returns a Vector of users in the cluster group.
	 */
	public Vector getClusterGroup( String userID, String clusterID ) {
		String query = "";
		String words = "";
		Vector users = new Vector();

		// get all users from this cluster group
		query = "select id,pos_word_freq from user where cluster_id=" + clusterID;
		query += " and id !=" + userID;
		ResultSet userResult = Query.executeQuery( query );

		try {
			// add all users to Vector that will be returned
			while ( userResult.next() ) {
				userID = userResult.getString( "id" );
				words = userResult.getString( "pos_word_freq" );
				users.add( new User( userID, words ) );
			}
			userResult.close();
			System.out.println( users.size() + " users in cluster..." );
		}
		catch ( SQLException e ) {
			System.err.println( "Error getting the user's cluster group..." );
			e.printStackTrace();
			System.exit( 0 );
		}

		return users;
	}

	/**
	 *  Generate a group of 5000 random users.
	 *
	 * @param  movieID  Id of movie we are trying to rate - the users selected must have rated this movie
	 * @return          Returns a Vector of the randomly selected users.
	 */
	public Vector getRandomGroup( String movieID ) {
		int randNum;
		int threshold;
		int num;
		String query = "";
		String ids = "";
		String words = "";
		String rating;
		String userID;
		Random rand = new Random();
		Vector users = new Vector();

		// select all users who have rated the movie
		query = "select distinct user_id,words,rating from rating where movie_id=" + movieID + " and user_id !=" + this.id;
		ResultSet ratingResult = Query.executeQuery( query );

		num = Query.getNumResults( ratingResult );
		System.out.print( num + " users rated movie " + movieID + "..." );

		if ( num == 0 ) {
			return null;
		}
		// we want to select 5000 of them
		threshold = (int) ( 5000.0 / num * 100 );

		try {
			while ( ratingResult.next() ) {
				userID = ratingResult.getString( "user_id" );
				words = ratingResult.getString( "words" );
				rating = ratingResult.getString( "rating" );

				if ( words == null ) {
					continue;
				}

				// randomly pick the user
				randNum = rand.nextInt( 100 ) + 1;
				if ( randNum < threshold ) {
					users.add( new User( userID, words, rating ) );
				}
			}

			ratingResult.close();
			System.out.println( users.size() + " randomly picked" );
		}
		catch ( SQLException e ) {
			System.err.println( "Error generating random group of users..." );
			e.printStackTrace();
			System.exit( 0 );
		}

		return users;
	}

}

