import java.util.*;
import java.io.*;

public class Episodes extends Displayactions {

/* Implements the Position Pair Set (PPS) algorithm of
   Ma, X., Pang, H., and Tan, K. (2004), ``Finding Constrained Episodes
      Using Minimal Occurrences,'' {\em Proceedings of the IEEE
      International Conference on Data Mining} (ICDM), Brighton, UK,
      November: 471-474.
   The test sequence used in this article is:

   String[] sequence = {"A","B","B","C","E","D","E","D","A","A","G","B",
      "C","A","C","B","F"};
*/

static boolean checkedep[] = new boolean[NMEPISODES];

static String lngth1episode[] = new String[NMEPISODES];
static String activesetepisode[][] = new String[NMEPISODES][NMACTNS];
static String trialepisode[][] = new String[NMEPISODES][NMACTNS];
static String cacheepisode[][] = new String[NMEPISODES][NMACTNS];

static int lngth1pos[][] = new int[NMEPISODES][10000];
static int lngth1epfreq[] = new int[NMEPISODES];
static int activesetfreq[] = new int[NMEPISODES];
static int trialfreq[] = new int[NMEPISODES];
static int cachefreq[] = new int[NMEPISODES];
static int activesetpp[][][] = new int[NMEPISODES][10000][2];
static int trialpp[][][] = new int[NMEPISODES][10000][2];
static int cachepp[][][] = new int[NMEPISODES][10000][2];

// ----------------------------------------------------------------------

static void episodes_(String sequence[], int seqlngth) {

// Finds episodes and their frequencies.

boolean epmatch = false;

String prntstrng;

int i, j, k, minsup = 2, nmlngth1eps = 0, pp0, pp1, setmembertogrow,
   episodelngth, nmadded, nmactiveset, nmcache = 0;

// First, find all length-1 episodes having frequency > min_sup.

for (i = 0; i < seqlngth; ++i) {
   epmatch = false;
   for (j = 0; j < nmlngth1eps; ++j) {
      if (sequence[i].equals(lngth1episode[j])) {
         epmatch = true;
	 lngth1pos[j][lngth1epfreq[j]] = i + 1;
         ++lngth1epfreq[j];
	 break;
      }
   }
   if (!epmatch) {
      lngth1episode[nmlngth1eps] = sequence[i];
      lngth1pos[nmlngth1eps][0] = i + 1;
      lngth1epfreq[nmlngth1eps] = 1;
      ++nmlngth1eps;
   }
}

// Load the active set.

j = 0;
for (i = 0; i < nmlngth1eps; ++i) {
   if (lngth1epfreq[i] > minsup) {
      activesetepisode[j][0] = lngth1episode[i];
      activesetfreq[j] = lngth1epfreq[i];
      for (k = 0; k < activesetfreq[j]; ++k) {
         activesetpp[j][k][0] = lngth1pos[i][k];
         activesetpp[j][k][1] = lngth1pos[i][k];
      }
      checkedep[j] = false;
      ++j;
   }
}
nmactiveset = j;

/* Main loop to grow active set episodes and place them on the cache once
   they stop growing. */

while (nmactiveset > 0) {
   nmadded = 0;
   setmembertogrow = nmactiveset - 1;
   if (!checkedep[setmembertogrow]) {
      checkedep[setmembertogrow] = true;
      ACTIVELOOP: for (i = 0; i < activesetfreq[setmembertogrow]; ++i) {

         /* Create trial episodes by adding the event just after the
            end-event of this i^th position pair to make a new episode. */

         pp0 = activesetpp[setmembertogrow][i][0];
         pp1 = activesetpp[setmembertogrow][i][1];
	 if (pp1 + 1 > seqlngth) {
            continue;
	 }

         episodelngth = 2 + pp1 - pp0;

	 // Episode length constraint.
	 
	 if (episodelngth > 5) {
            continue;
	 }

         for (j = 0; j < episodelngth; ++j) {
            trialepisode[i][j] = sequence[pp0 + j - 1];
         }

         // Find this trial episode's frequency.

         trialfreq[i] = 0;
         for (j = 0; j < seqlngth - episodelngth + 1; ++j) {
            epmatch = true;
	    for (k = 0; k < episodelngth; ++k) {
               if (!trialepisode[i][k].equals(sequence[j + k])) {
                  epmatch = false;
	          break;
	       }
	    }
	    if (epmatch) {
	       trialpp[i][trialfreq[i]][0] = j + 1;
	       trialpp[i][trialfreq[i]][1] = j + episodelngth;
               ++trialfreq[i];
	    }
         }

         // Add a frequently occurring trial episode to the active set.

         if (trialfreq[i] > minsup) {

	    // Don't add an episode to the active set more than once.

	    for (j = 0; j < nmactiveset; ++j) {
               epmatch = true;
	       for (k = 0; k < episodelngth; ++k) {
                  if (!trialepisode[i][k].equals(activesetepisode[j][k])) {
                     epmatch = false;
	             break;
	          }
	       }
	       if (epmatch) {
                  continue ACTIVELOOP;
               }
            }

            for (j = 0; j < episodelngth; ++j) {
               activesetepisode[nmactiveset][j] = trialepisode[i][j];
            }
            activesetfreq[nmactiveset] = trialfreq[i];
            for (j = 0; j < activesetfreq[nmactiveset]; ++j) {
               activesetpp[nmactiveset][j][0] = trialpp[i][j][0];
               activesetpp[nmactiveset][j][1] = trialpp[i][j][1];
            }
	    checkedep[nmactiveset] = false;
            ++nmadded;
            ++nmactiveset;

	    if (nmactiveset == NMEPISODES) {
               iderr_("episodes: nmactiveset = NMEPISODES");
	    }
         }
      }
   }

   /* If this active set episode did not grow, remove it from the active
      set and and place it in the cache. */

   if (nmadded == 0) {
      
      // Add episode to cache.

      episodelngth = 1 + activesetpp[setmembertogrow][0][1] -
                     activesetpp[setmembertogrow][0][0];
      for (i = 0; i < episodelngth; ++i) {
         cacheepisode[nmcache][i] = activesetepisode[setmembertogrow][i];
      }
      cachefreq[nmcache] = activesetfreq[setmembertogrow];
      for (i = 0; i < activesetfreq[setmembertogrow]; ++i) {
         cachepp[nmcache][i][0] = activesetpp[setmembertogrow][i][0];
         cachepp[nmcache][i][1] = activesetpp[setmembertogrow][i][1];
      }
      prntstrng = cacheepisode[nmcache][0];
      for (i = 1; i < episodelngth; ++i) {
         prntstrng += cacheepisode[nmcache][i];
      }
      ++nmcache;
      if (nmcache == NMEPISODES) {
         iderr_("episodes: nmcache = NMEPISODES");
      }

      /* Ratchet active set down one record.  Note that since no episodes
         were added to the active set, "setmembertogrow" is the current
         last episode in the active set. */
     
      --nmactiveset;
   }
}

// Print out cached episodes and their position pairs.

fprintf_(1, "\n---------- Episodes ---------------" +
   "\nnmcache= " + nmcache);
for (i = 0; i < nmcache; ++i) {
   episodelngth = 1 + cachepp[i][0][1] - cachepp[i][0][0];
   if (episodelngth == 1 || cachefreq[i] < 5) {
      continue;
   }

   fprintf_(1, "\nThe following episode of length " +
      episodelngth + " occurred " + cachefreq[i] + " times.");
   for (j = 0; j < episodelngth; ++j) {
      fprintf_(1, "     Action " + (j + 1) + ": " + cacheepisode[i][j]);
   }

   fprintf_(1, "Its position pairs are:");
   for (j = 0; j < cachefreq[i]; ++j) {
      fprintf_(1, cachepp[i][j][0] + ", " + cachepp[i][j][1]);
   }
}
}
}
