<?php
  /*
  *  Parses the movie data file from the IMDB data set and loads into database.
  */

  include("db_funcs.php");

  // open datafile for reading
  $fid = fopen("data/movies.list", "r");

  // make sure database connected ok
  if (!db_connect()) {
    print("Could not connect to database.");
    exit;
  }

  // read through data file
  $line = fgets($fid);
  while (!feof($fid)) {
    // tokenize each line - fields are tab-delimited
    $title = strtok($line,"\t");
    $year = strtok("\t");

    // cleanup the title
    $pos1 = strpos($title, "(TV)");
    $pos2 = strpos($title, "(V)");
    $pos3 = strpos($title, "(VG)");

    // if any of these strings presesent, we don't want that title
    if (($pos1===false) && ($pos2===false) && ($pos3===false)) {
      // get rid of year at end of title
      $title = substr($title, 0, strlen($title)-7);
    }
    else {
      print("Throwing out " . $title . "<br>");
      $line = fgets($fid);
      continue;
    }

    // put 'The' at beginning to match EachMovie data
    if (strpos($title, ", The")) {
      $title = substr($title, 0, strlen($title)-5);
      $title = "The " . $title;
    }

    // add slashes to handle quotations in title
    $title = addslashes($title);

    // check if movie already in database
    $qry = "SELECT id,year FROM movie WHERE title LIKE '$title%'";
    $result = mysql_query($qry) or die("Invalid query: " . mysql_error());
    if (mysql_num_rows($result) > 0) {
      for ($i=0; $i<mysql_num_rows($result); $i++) {
        $row = mysql_fetch_array($result);
        $id = $row['id'];
        $yr = $row['year'];
        // movie in database, but no year
        if (strlen(trim($yr)) == 0) {
          $qry = "UPDATE movie SET year='$year' WHERE id='$id'";
          print($qry . "<br>\n");
          mysql_query($qry) or die("Invalid query: " . mysql_error());
        }
      }
    }
    // if not in database, add it
    else {
      $qry = "INSERT INTO movie (title,year) VALUES ('$title','$year')";
      print($qry . "<br>\n");
      mysql_query($qry) or die("Invalid query: " . mysql_error());
    }

    $line = fgets($fid);
  }

  fclose($fid);
?>
