import java.sql.*;
import java.util.*;

/**
 *  Randomly selects the training and test set for the ratings data.
 *  There is around 75% training, 25% test.
 *
 * @author     Brandon Douthit-Wood
 * @created    March 31, 2004
 */
public class ChooseTrainingSet {

	/**
	 *  Randomly chooses the test data set.
	 *
	 * @param  args           The command line arguments
	 */
	public static void main( String[] args ) {
		String query;
		String id;
		int randNum;
		int totalNum;
		int trainingCount = 0;
		int testCount = 0;
		Random rand = new Random();

		if ( !Query.connectToDB() ) {
			System.exit( 0 );
		}

		// get all the ratings
		query = "select id from rating";
		ResultSet ratingResult = Query.executeQuery( query );
		totalNum = Query.getNumResults( ratingResult );

		try {
			while ( ratingResult.next() ) {
				id = ratingResult.getString( "id" );

				randNum = rand.nextInt( 100 ) + 1;
				// 75% of ratings in training set, 25% in test set
				if ( randNum <= 75 ) {
					System.out.println( id + " -> training set" );
					trainingCount++;
					query = "update rating set training='x' where id=" + id;
					Query.executeUpdate( query );
				}
				else {
					testCount++;
					System.out.println( id + " -> test set" );
				}
			}
			ratingResult.close();
		}
		catch (SQLException e) {
			System.err.println( "Error choosing training and test sets..." );
			e.printStackTrace();
			System.exit(0);
		}

		// display total number in test and training sets - just to make sure random is doing a good job!
		System.out.println( "\n\nTotal ratings: " + totalNum );
		System.out.println( "Number in training set: " + trainingCount + "  (" + (float) trainingCount / totalNum * 100 + "%)" );
		System.out.println( "Number in test set: " + testCount + "  (" + (float) testCount / totalNum * 100 + "%)" );
	}
}

