/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/* bos720 src/bos/usr/sbin/perf/pmapi/pmtoolkit/kthreadapit1.c 1.35       */
/*                                                                        */
/* Licensed Materials - Property of IBM                                   */
/*                                                                        */
/* Restricted Materials of IBM                                            */
/*                                                                        */
/* COPYRIGHT International Business Machines Corp. 1999,2009              */
/* All Rights Reserved                                                    */
/*                                                                        */
/* US Government Users Restricted Rights - Use, duplication or            */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.      */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
static char sccsid[] = "@(#)69        1.35  src/bos/usr/sbin/perf/pmapi/pmtoolkit/kthreadapit1.c, pmapi, bos720 7/3/09 03:30:32";

/*
 *   COMPONENT_NAME: PMAPI
 *
 *   FUNCTIONS: debugger
 *		gettids
 *		init_tdata
 *		main
 *		pexit
 *		pmerror
 *		print_data
 *		print_prog
 *		print_time
 *		process_args
 *		pusageexit
 *		worker
 *
 *   ORIGINS: 27
 *
 *                    -- (                            when
 *   combined with the aggregated modules for this product)
 *   OBJECT CODE ONLY SOURCE MATERIALS
 *
 *   (C) COPYRIGHT International Business Machines Corp. 1999, 2001.
 *   All Rights Reserved
 *   US Government Users Restricted Rights - Use, duplication or
 *   disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
 */

/* From the manual page for pthread_create:

-- begin excerpt
Note: The pthread.h header file must be the first included file of each 
source file using the threads library.  Otherwise, the -D_THREAD_SAFE 
compilation flag should be used, or the cc_r compiler used. In this case,
the flag is automatically set.
-- end excerpt

Thus pthread.h is the first include file here.

*/
#include <pthread.h>
#include <stdio.h>
#include <stdarg.h>
#include <unistd.h>
#include <procinfo.h>
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <sys/m_wait.h>
#include <sys/reg.h>
#include <sys/types.h>
#include <sys/time.h>
#include "pmapi.h"

#define OPTIONSTR	"df:Fgi:prRt:TG"/* possible option flags */
#define MAX_THREADS	32767		/* maximum number of threads */
#define NTHREADS	16		/* number of threadinfo to
					   retrieve in on getthrds call */
#define	NOGROUP		0		/* do not decode and print group
					   flags and member count */
#define	GROUP		1		/* decode and print group flags
					   and member count */
#define NOTID		-1		/* init value for worker tid */
#define TRAP		0x7c821008	/* the debugger TRAP : tweq r2,r2 */

#define PMERROR(a1,a2)          pmerror(__LINE__, a1, a2)
#define PEXIT(a1)               pexit(__LINE__, a1)
#define PEXIT2(a1,a2)           pexit(__LINE__, a1, a2)
#define PEXIT3(a1,a2,a3)        pexit(__LINE__, a1, a2, a3)
#define PEXIT4(a1,a2,a3,a4)     pexit(__LINE__, a1, a2, a3, a4)

#define PROGRAM		0		/* program breakpoint */
#define REPROGRAM	1		/* reprogramming/reset breakpoint */
#define READTHREAD	2		/* read thread data breakpoint */
#define READGROUP	3		/* read group data breakpoint */
#define NBREAKS		4		/* number of breakpoints */
#define	BREAKTYPE(x)	(x == PROGRAM)    ? "PROGRAM"     :\
			(x == REPROGRAM)  ? Reprogram ? "REPROGRAM" : "RESET" :\
			(x == READTHREAD) ? "READ THREAD" :\
			(x == READGROUP)  ? "READ GROUP"  : "UNKOWN TYPE"

struct {
	unsigned	addr;	/* address of breakpoint */
	unsigned	isave;	/* saving area for instruction */
	unsigned	isave2;	/* saving area for next instruction */
	int		type;	/* type of breakpoint */
} breakpoint[NBREAKS] = {
	{0x10000418, 0, 0, PROGRAM},
	{0x10000640, 0, 0, REPROGRAM},
	{0x10000664, 0, 0, READTHREAD},
	{0x10000514, 0, 0, READGROUP},
};

typedef struct {		/* data structure for worker threads */
  int		iterations;	/* number of iterations in busy loops */
  tid_t		tid;		/* thread id */
  double	*gdouble;	/* pointer to global double to increment */
  pm_prog_t	prog;		/* to save programmation on entry */
  pm_prog_t	prog1;		/* to save programmation after first loop */
  pm_prog_t	gprog;		/* to save group programmation */
  pm_data_t	data1;		/* to save data after first loop */
  pm_data_t	data2;		/* to save data after second loop */
  pm_data_t	gdata;		/* to save group data */
  timebasestruct_t	tstamp1;/* timestamp after first loop */
  timebasestruct_t	tstamp2;/* timestamp after second loop */
  timebasestruct_t	gtstamp;/* timestamp for group data */
} threaddata_t;

threaddata_t		Tdata[MAX_THREADS];	/* array of thread private
						   data structures */
double			Tdouble[MAX_THREADS];	/* globals to increment
						   in busy loop */
pthread_t		Threadid[MAX_THREADS];	/* pthread ids saving area */

pthread_mutex_t		WorkLock;		/* mutex to synchronize
						   threads */
pthread_cond_t		CondWork;		/* condvar to block workers
						   threads */
pthread_cond_t		CondMain;		/* condvar to block main
						   thread */
int			ReadyWorkers;		/* all worker ready
						   indication flag */
int			GoWorkers;		/* worker release flag */
int			Go2Workers;		/* worker release flag */
tid_t			LastWorker;		/* tid of last worker to
						   finish */
int			Iterations;		/* number of iterations
						   in busy loop */
int			Failtest;		/* flag to turn on test
						   supposed to fail */
int			Threads;		/* number of threads to
						   create */
int			Group;			/* flag to turn on group
						   tests */
int			EventGroup = 0;		/* flag to turn on event
						   group tests */
int			Process;		/* flag to turn on process
						   group tests */
int			Reset;			/* flag to turn on reset
						   tests */
int			Reprogram;		/* flag to turn on
						   reprogramming tests */
int			Debugger;		/* flag to turn on debugger
						   tests */
int			Floatcount;		/* count FP inst and cycles
						   counting instead of cpi */
pm_info2_t		Pminfo;			/* anchor to information
						   returned by pm_initialize */
pm_groups_info_t	Pmginfo;		/* anchor to event group
						   information returned by
						   pm_initialize */
int			Timestamp;		/* use timestamps */

#define myprintf printf /* facilitates removal of temporary printfs */

/*
 * call pm_error to decode and print error message,
 * then exit if we are not in failtest mode.
 */
void
pmerror(int lineno, char *where, int error)
{
  fprintf(stderr,"line %d: ", lineno);

  pm_error(where, error);
  
  if (!Failtest)
    exit(1);
}

/*
 * print error message and location, then exit
 */
void
pexit(int lineno, char *fmt, ...)
{
  va_list args;

  fprintf(stderr, "line %d: ", lineno);

  va_start(args, fmt);
  vfprintf(stderr, fmt, args);
  va_end(args);
  printf("\n");
  
  exit(1);
}

/*
 * print usage and exit
 */
void
pusageexit(char *error)
{
  printf("%s\n\n", error);
  printf("usage : kthreadapit1 [-i count] [-t count] [-T] [-g] [-r|-R]");
  printf("                     [-d] [-f [1|2]] [-F] [-G]\n\n");
  printf("   -i count : number of iteration in worker loop\n");
  printf("   -t count : number of threads to create (max is %d)\n",
	 MAX_THREADS);
  printf("   -g       : create group then report and reset group data in threads\n");
  printf("   -p       : create process group (need at least one worker thread, or -d)\n");
  printf("   -f       : count number of floating point inst and cycles instead of cpi\n");
  printf("              1 and 2 select a different set of FP events\n");
  printf("   -F       : failure tests, i.e. make calls supposed to fail\n");
  printf("   -r       : reset data in worker\n");
  printf("   -R       : reprogram counting in worker\n");
  printf("   -d       : use debugger to count\n");
  printf("   -T       : print timestamps\n");
  printf("   -G       : use an event group\n");
  
  exit(1);
}

/*
 * process arguments and set global test flags
 */
void
process_args(int argc, char **argv)
{
  extern char	*optarg;
  extern int	optind;
  int		opt;

  Debugger   = 0;
  Failtest   = 0;
  Threads    = 0;
  Group      = 0;
  Process    = 0;
  Reset      = 0;
  Reprogram  = 0;
  Floatcount = 0;
  Iterations = 100;

  while ((opt = getopt(argc, argv, OPTIONSTR)) != EOF)
    switch(opt) {
    case 'd':
      Debugger = 1;
      break;

    case 'f':
      Floatcount = strtoul(optarg, (char **)NULL, 10);
      break;

    case 'F':
      Failtest = 1;
      break;

    case 'g':
      Group = 1;
      break;

    case 'G':
      EventGroup = 1;
      break;

    case 'i':
      Iterations = strtoul(optarg, (char **)NULL, 10);
      break;

    case 'p':
      Process = 1;
      break;

    case 'r':
      Reset = 1;
      break;

    case 'R':
      Reprogram = 1;
      break;

    case 't':
      Threads = strtoul(optarg, (char **)NULL, 10);
      if (Threads > MAX_THREADS)
	pusageexit("too many threads");
      break;

    case 'T':
      Timestamp = 1;
      break;

    default:
      pusageexit("invalid argument");
    }

  if (Process && !Threads && !Debugger)
    pusageexit("cannot use -p without -t or -d");

  if (Reset && Reprogram)
    pusageexit("cannot use -r with -R");
}

/*
 * decode and print programmation
 */
void
print_prog(char *where, pm_prog_t *prog)
{
  int i, gnum, evnum;

  printf("%s : ", where);

  /* if mode is set, decode it */
  if (prog->mode.w) {
    printf("mode = ");

    if (prog->mode.b.proctree)
      printf("proctree,");

    if (prog->mode.b.process)
      printf("process,");

    if (prog->mode.b.user)
      printf("user,");

    if (prog->mode.b.kernel)
      printf("kernel,");

    if (prog->mode.b.count)
      printf("count;");
    else
      printf("stop;");

  }
	
  /* print events list */
  printf(" events = ");
    
  gnum = prog->events[0];

  for (i = 0; i < Pminfo.maxpmcs-1; i++) {
    if (EventGroup)
      evnum = Pmginfo.event_groups[gnum].events[i];
    else
      evnum = prog->events[i];

    if (evnum == COUNT_NOTHING)
      printf("nc,");
    else
      printf("%d(%s),", evnum, Pminfo.list_events[i][evnum].short_name);
  }

  if (EventGroup)
    evnum = Pmginfo.event_groups[gnum].events[Pminfo.maxpmcs-1];
  else
    evnum = prog->events[Pminfo.maxpmcs-1];

  if (evnum == COUNT_NOTHING)
    printf("nc ");
  else
    printf("%d(%s) ", evnum, 
           Pminfo.list_events[Pminfo.maxpmcs-1][evnum].short_name);

  if (EventGroup)
    printf("[group %d]\n", gnum);
  else
    printf("\n");
}

void
print_time(char *where, timebasestruct_t *time)
{
  int rc;

  if (!Timestamp)
    return;

  if ((rc = time_base_to_time(time, TIMEBASE_SZ)) < 0)
    PEXIT2("time_base_to_time failed : rc = %d", rc);

  printf("%s : timestamp = %usec,%uns\n", where, time->tb_high, time->tb_low);
}

/*
 * print counter values, and optionally decode group flags
 */
void
print_data(char *where, pm_data_t *data, int group)
{
  int i;
  
  /* print accumulated counter values */
  printf("%s : counts = ", where);
  for (i = 0; i < Pminfo.maxpmcs-1; i++)
    printf("%llu,", data->accu[i]);
  printf("%llu\n", data->accu[Pminfo.maxpmcs-1]);
  
  /* decode group flags only if asked to do 
   * so and there are members in the group */
  if (group == GROUP && data->ginfo.members)
    printf("%s : %s %s group with %d members\n", where,
	   data->ginfo.flags.b.consistent ? "consistent" : "inconsistent",
	   data->ginfo.flags.b.process    ? "process"    : "non process",
	   data->ginfo.members);
}

/*
 * init a thread private data structure
 */
init_tdata(int index)
{
  Tdata[index].iterations = Iterations;
  Tdata[index].tid        = NOTID;
  Tdata[index].gdouble    = &(Tdouble[index]);
}

/*
 * worker thread 
 */
void *
worker(void *arg)
{
  int			i, rc;
  unsigned long long	ui;
  threaddata_t		*td = (threaddata_t *) arg;
  pm_prog_t		reprog;

  /* save my tid to mark that I am ready */
  td->tid = thread_self();

  /* for process group, block all threads before creating group */
  if (Process) {
    if ((rc = pthread_mutex_lock(&WorkLock)) != 0)
      PEXIT2("pthread_mutex_lock failed in worker : rc = %d", rc);

    /* check to see if all other threads are ready */
    for (i = 0; i < Threads; i++)
      if (Tdata[i].tid == NOTID)
	break;

    if (i == Threads) {
      /* all worker threads are ready */
      ReadyWorkers = 1;

      /* signal main thread, so that it can create the group */
      if ((rc = pthread_cond_signal(&CondMain)) != 0)
	PEXIT2("pthread_cond_signal failed in worker : rc = %d", rc);
    }

    /* wait for GoWorkers to be set by main thread (when the group is ready) */
    while (GoWorkers != 1)
      if ((rc = pthread_cond_wait(&CondWork, &WorkLock)) != 0)
	PEXIT2("pthread_cond_wait failed in worker : rc = %d", rc);

    if ((rc = pthread_mutex_unlock(&WorkLock)) != 0)
      PEXIT2("pthread_mutex_unlock failed in worker : rc = %d", rc);
    
  } /* if (Process) */

  /* check what we are counting */
  if (Group) {
    if ((rc = pm_get_program_mygroup(&(td->prog))) > 0)
      PMERROR("pm_get_program_mygroup in worker", rc);
  }
  else {
    if ((rc = pm_get_program_mythread(&(td->prog))) > 0)
      PMERROR("pm_get_program_mythread in worker", rc);
  }

  /* try to start counting for a non existent group */
  if (Failtest && !Group && !Process)
    if ((rc = pm_start_mygroup()) > 0)
      PMERROR("pm_start_mygroup in worker", rc);

  /* start counting for this thread */
  if ((rc = pm_start_mythread()) > 0)
    PMERROR("pm_start_mythread in worker", rc);
  
  /* stay busy for a little while */
  for (ui = 0; ui < td->iterations; ui++)
    (*(td->gdouble)) += 0.5;

  /* get the accumulated counts, and optional timestamp, for this thread */
  if (Timestamp) {
    if ((rc = pm_get_tdata_mythread(&(td->data1), &(td->tstamp1))) > 0)
      PMERROR("pm_get_tdata_mythread data1 in worker", rc);
  }
  else {
    if ((rc = pm_get_data_mythread(&(td->data1))) > 0)
      PMERROR("pm_get_data_mythread data1 in worker", rc);
  }

  /* check what we are couting for this thread */
  if ((rc = pm_get_program_mythread(&(td->prog1))) > 0)
    PMERROR("pm_get_program_mythread prog1 in worker", rc);
  
  if (Reset) {
    if (Group) {
      /* reset group data which makes group inconsistent (-g -r) */
      if ((rc = pm_reset_data_mygroup()) > 0)
	PMERROR("pm_reset_data_mygroup in worker", rc);
    }
    else {
      /* reset thread data which shoudln't make group inconsistent (-p -r) */
      if ((rc = pm_reset_data_mythread()) > 0)
	PMERROR("pm_reset_data_mythread in worker", rc);
    }
  }
  else if (Reprogram) {
    if (Process) {
      if (Failtest) {
	/* this should cause synchronization problems (need big number of
	 * iterations), at least on MP, because not all the threads will
	 * reach this point at the same time, so some of the threads will
	 * make _mygroup calls above when the group does not exist anymore */
	if ((rc = pm_get_program_mygroup(&reprog)) > 0)
	  PMERROR("pm_get_program_mygroup in worker", rc);
	
	if ((rc = pm_delete_program_mygroup()) > 0)
	  PMERROR("pm_delete_program_mygroup in worker", rc);
	
	/* recreate a process group only when -g was used too */
	reprog.mode.b.process = Group;
	if ((rc = pm_set_program_mygroup(&reprog)) > 0)
	  PMERROR("pm_(re)set_program_mygroup in worker", rc);
      }
      else {
	tid_t mytid = td->tid;
	
	if ((rc = pthread_mutex_lock(&WorkLock)) != 0)
	  PEXIT2("pthread_mutex_lock failed in worker : rc = %d", rc);
	
	/* reset my tid to mark that I am ready to be reprogrammed */
	td->tid = NOTID;
	
	/* check to see if all other threads are ready */
	for (i = 0; i < Threads; i++)
	  if (Tdata[i].tid != NOTID)
	    break;

	/* last thread reprograms the group */
	if (i == Threads) {
	  /* get current group programmation */
	  if ((rc = pm_get_program_mygroup(&reprog)) > 0)
	    PMERROR("pm_get_program_mygroup failed in worker", rc);

	  /* delete group */
	  if ((rc = pm_delete_program_mygroup()) > 0)
	    PMERROR("pm_delete_program_mygroup failed in worker", rc);
	  
	  /* recreate the group */
	  if ((rc = pm_set_program_mygroup(&reprog)) > 0)
	    PMERROR("pm_(re)set_program_mygroup failed in worker", rc);
	  
	  /* release all the other workers */
	  Go2Workers = 1;
	  
	  if ((rc = pthread_cond_broadcast(&CondWork)) != 0)
	    PEXIT2("pthread_cond_broadcast failed in worker : rc = %d", rc);
	}
	else {
	  /* wait for Go2Workers to be set by last
	   * thread when group has been recreated */
	  while (Go2Workers != 1)
	    if ((rc = pthread_cond_wait(&CondWork, &WorkLock)) != 0)
	      PEXIT2("pthread_cond_wait failed in worker : rc = %d", rc);
	}
	
	if ((rc = pthread_mutex_unlock(&WorkLock)) != 0)
	  PEXIT2("pthread_mutex_unlock failed in worker : rc = %d", rc);
	
	/* restore my tid */
	td->tid = mytid;
      }
    }
    else {
      /* get, delete and reset thread programmation */
      if ((rc = pm_get_program_mythread(&reprog)) > 0)
	PMERROR("pm_get_program_mythread in worker", rc);
      
      if ((rc = pm_delete_program_mythread()) > 0)
	PMERROR("pm_delete_program_mythread in worker", rc);
      
      if ((rc = pm_set_program_mythread(&reprog)) > 0)
	PMERROR("pm_(re)set_program_mythread in worker", rc);
    }
  } /* else if (Reprogram) */

  /* stay busy for a little while */
  for (ui = 0; ui < td->iterations; ui++)
    (*(td->gdouble)) += 0.5;

  /* this worker is done */
  td->iterations = 0;

  /* stop counting for this thread */
  if ((rc = pm_stop_mythread()) > 0)
    PMERROR("pm_stop_mythread in worker", rc);

  if (Group || Process) {
    /* save tid, to print group results only 
     * for the last thread to finish, see main */
    LastWorker = td->tid;
    
    /* read group data, with optional timestamp */
    if (Timestamp) {
      if ((rc = pm_get_tdata_mygroup(&(td->gdata), &(td->gtstamp))) > 0)
        PMERROR("pm_get_tdata_mygroup gdata in worker", rc);
    }
    else {
      if ((rc = pm_get_data_mygroup(&(td->gdata))) > 0)
        PMERROR("pm_get_data_mygroup gdata in worker", rc);
    }
    
    /* read group programmation */
    if ((rc = pm_get_program_mygroup(&(td->gprog))) > 0)
      PMERROR("pm_get_program_mygroup gprog in worker", rc);
  }
  
  /* try to read data and programmation for a non existent group */
  if (Failtest && !Group && !Process ) {
    if ((rc = pm_get_data_mygroup(&(td->gdata))) > 0)
      PMERROR("pm_get_data_mygroup gdata in worker", rc);
    
    if ((rc = pm_get_program_mygroup(&(td->gprog))) > 0)
      PMERROR("pm_get_program_mygroup gprog in worker", rc);
  }
  
  /* read this thread final data values, and optional timestamp */
  if (Timestamp) {
    if ((rc = pm_get_tdata_mythread(&(td->data2), &(td->tstamp2))) > 0)
      PMERROR("pm_get_tdata_mythread data2 in worker", rc);
  }
  else {
    if ((rc = pm_get_data_mythread(&(td->data2))) > 0)
      PMERROR("pm_get_data_mythread data2 in worker", rc);
  }
  
} /* worker () */

/*
 * create list of tids having caused the breakpoint using getthrds
 */
void
gettids(pid_t pid, struct ptthreads *threadlist, int *ntids)
{
  struct thrdsinfo64	tib[NTHREADS];
  int			i, nthreads, size = sizeof(struct thrdsinfo64);
  tid_t			index;
  
  index = nthreads = *ntids = 0;
  
  do {
    /* get NTHREADS thrdsinfo from selected process */
    if ((nthreads = getthrds(pid, tib, size, &index, NTHREADS)) < 0)
      PEXIT2("getthrds for process %d", pid);
	
    /* search for tid of thread(s) which caused breakpoint */
    for (i = 0; i < nthreads; i++) {
      if (tib[i].ti_flag & TTRCSIG)
	/* copy tid to list */
	(*threadlist).th[(*ntids)++] = tib[i].ti_tid;
    }
  }
  while (nthreads == NTHREADS);

  (*threadlist).th[(*ntids)] = 0;

} /* gettids () */

/*
 * light-weight debugger : inserts API calls to start
 * counting and read data at breakpoints in a debuggee
 */
#ifndef __64BIT__ 
void
debugger(void)
{
  int			i, j, rc, type, first_stop, ntids, contsig;
  pid_t			pid;
  tid_t			tid;
  union wait		status;
  unsigned		iar, inst;
  pm_prog_t		prog;
  pm_data_t		data;
  int			*iaddr;
  struct ptthreads	ptthreadslist;
  timebasestruct_t	time;
  
  first_stop = 1;
  
  while (1) {
    /* wait for next signal */
    pid = wait(&(status.w_status));
    
    /* set continue signal */
    contsig = status.w_stopsig == SIGTRAP ? 0 : status.w_stopsig;
    
    if (WIFEXITED(status) || WIFSIGNALED(status))
      break;
    
    switch (status.w_S.w_Stopval) {
      /* stopped after fork */
    case W_SFWTED:
      PEXIT("process forked, aborting trace");
      
      /* stopped after load */
    case W_SLWTED:
      PEXIT("process executed load, aborting trace");
      
      /* stopped after exec */
    case W_SEWTED:
      /* stopped after trace */
    case W_STRC:
      
      if (first_stop) {
	/* plug in breaks for all the requested start and stop addresses */
	for (i = 0; i < NBREAKS; i++) {
	  /* set reprogram breakpoint only when asked */
	  if (!Reprogram && !Reset && breakpoint[i].type == REPROGRAM) 
	    continue;
	  
	  /* read instruction at this address */
	  inst = ptrace(PT_READ_I, pid, (int *) breakpoint[i].addr, 0, 0);
	  if (inst == -1)
	    perror("could not read instruction");
	  
	  /* save the original instruction at that address */
	  breakpoint[i].isave = inst;
	  
	  /* write a trap instruction at this address */
	  if (ptrace(PT_WRITE_I, pid,
		     (int *) breakpoint[i].addr, TRAP, 0) == -1 )
	    perror("could not write instruction");
	  
	  printf("%s breakpoint inserted at %08X\n",
		 BREAKTYPE(breakpoint[i].type), breakpoint[i].addr);
	}
	
	/* do this only once */
	first_stop = 0;
	
	/* continue */
	if (ptrace(PT_CONTINUE, pid, (int *) 1, contsig, (int *) NULL) == -1) {
	  perror("could not continue");
	  exit(1);
	}
	
	break;
      }
      
      /* get current program counter */
      if ((iar = ptrace(PT_READ_GPR, pid, (int *) IAR, 0, 0)) < 0) {
	perror("could not read iar");
	exit(1);
      }
      
      /* scan breakpoint array for this iar */
      for (i = 0; i < NBREAKS; i++)
	if (iar == breakpoint[i].addr)
	  break;
      
      if (i == NBREAKS) {
	/* not a breakpoint, try single-step */
	for (i = 0; i < NBREAKS; i++)
	  if (iar == (breakpoint[i].addr)+4)
	    break;
	
	if (i == NBREAKS) {
	  printf("BAD breakpoint at %08X\n", iar);
	  break;
	}
	
	/* single step breakpoint, restore original instruction */
	if (ptrace(PT_WRITE_I, pid, (int *) iar, breakpoint[i].isave2, 0) == -1)
	  perror("could not write instruction");
	
	
	/* then restore matching breakpoint */
	if (ptrace(PT_WRITE_I, pid, (int *) breakpoint[i].addr, TRAP, 0) == -1)
	  perror("could not write instruction");
	
	/* continue process */
	if (ptrace(PT_CONTINUE, pid, (int *) 1, contsig, (int *) NULL) == -1) {
	  perror("could not continue");
	  exit(1);
	}
	
	break;
      }
      
      /* get list of threads which caused this breakpoint */
      gettids(pid, &ptthreadslist, &ntids);
      
      /* always take first thread from list */
      tid = ptthreadslist.th[0];

      type = breakpoint[i].type;
      printf("thread %d reached %s breakpoint at %08X\n",
	     tid, BREAKTYPE(type), iar);
      
      /* if program point reached */
      if (type == PROGRAM) {
	/* set up counting mode */
	prog.mode.w       = 0;	/* start with clean mode */
	prog.mode.b.user  = 1;	/* count only user mode */
	prog.mode.b.count = 1;	/* start counting immediately */
	
	/* init counters to not count anything */
	for (j = 0; j < Pminfo.maxpmcs; j++)
	  prog.events[j] = COUNT_NOTHING;
	
	prog.mode.b.is_group = EventGroup;
	
	printf("Processor type : %s\n", Pminfo.proc_name);

	/* count cycles and instructions, preferably in pmc2 and pmc1 */

	if (EventGroup) {
	  
	  /* set event group ID.  5 cases. */
	  if ((!strcmp(Pminfo.proc_name, "PowerPC604")) ||
	      (!strcmp(Pminfo.proc_name, "PowerPC604e"))) {
	    
	    /* prog.events[0] = ?; */
	    
	  } else if ((!strcmp(Pminfo.proc_name, "POWER3")) ||
		     (!strcmp(Pminfo.proc_name, "POWER3-II"))) {
	    
	    /* Event group ID */
	    prog.events[0] = 4; 
	    
	  } else if ((!strcmp(Pminfo.proc_name, "RS64-II")) ||
		     (!strcmp(Pminfo.proc_name, "RS64-III"))) {
	    
	    /* prog.events[0] = ?; */
	    
	  } else if ((!strncmp(Pminfo.proc_name, "POWER4",6)) ||
                     (!strncmp(Pminfo.proc_name, "PowerPC970",10)) ||
		     (!strncmp(Pminfo.proc_name, "PowerPC970MP",12))) {
	    
	    /* Group 2 on GP, "Basic performance indicators",
	       gives you instructions in pmc1 and cycles in pmc2 */
	    prog.events[0] = 2;
	    
	  }else if ((!strcmp(Pminfo.proc_name, "POWER5"))) {
	    
	    /* Group 1 on GR, "Completion and cycle counts",
	       gives you instructions in pmc5 and cycles in pmc4 */
	    prog.events[0] = 1;
	    
	  }else if ((!strcmp(Pminfo.proc_name, "POWER5-II"))) {
	    
	    /* Group 0 on GQ, "pm_utilization",
	       gives you instructions in pmc2 and cycles in pmc4 */
	    prog.events[0] = 0;
	    
	  } else if ((!strcmp(Pminfo.proc_name, "POWER6"))) {
	    
	    /* Group 0, "CPI and utilization data",
	       gives you instructions in pmc2 and cycles in pmc4 */
	    prog.events[0] = 0;
	    
	  }
	  else if ((!strcmp(Pminfo.proc_name, "POWER7"))) {

      /* Group 0, "CPI and utilization data",
         gives you instructions in pmc4 and cycles in pmc1 */
	  prog.events[0] = 0;

       }
	  printf("Group chosen : %d\n", prog.events[0]);

	} else {
	  
	  /* Individual events specified */
	  
	  /* cycles in pmc 2 is 1 for all processor types (so far!). */
	  prog.events[1] = 1;
	  
	  /* set instructions-completed in pmc1.  Cases based on processor. */
	  if ((!strcmp(Pminfo.proc_name, "PowerPC604")) ||
	      (!strcmp(Pminfo.proc_name, "PowerPC604e"))) {
	    
	    prog.events[0] = 2;
	    
	  } else if ((!strcmp(Pminfo.proc_name, "POWER3")) ||
		     (!strcmp(Pminfo.proc_name, "POWER3-II"))) {
	    
	    prog.events[0] = 1;
	    
	  } else if ((!strcmp(Pminfo.proc_name, "RS64-II")) ||
		     (!strcmp(Pminfo.proc_name, "RS64-III"))) {
	    
	    prog.events[0] = 3;
	    
	  } else if (!strcmp(Pminfo.proc_name, "MPC7450")) { 
	  
	    prog.events[0] = 2;

	  }
	    
	}  /* if (EventGroup) */
	
	/* set programmation */
	if (Group || Process) {
	  prog.mode.b.process = Process;
	  if ((rc = pm_set_program_group(pid, tid, &prog)) > 0)
	    PMERROR("pm_set_program_group", rc);
	}
	else {
	  if ((rc = pm_set_program_thread(pid, tid, &prog)) > 0)
	    PMERROR("pm_set_program_thread", rc);
	}
      }
      /* if reprogram/reset point reached */
      else if (type == REPROGRAM) {
	/* read thread counter data and optional timestamp */
	if (Timestamp) {
	  if ((rc = pm_get_tdata_thread(pid, tid, &data, &time)) > 0)
	    PMERROR("pm_get_tdata_thread", rc);
	}
	else {
	  if ((rc = pm_get_data_thread(pid, tid, &data)) > 0)
	    PMERROR("pm_get_data_thread", rc);
	}
	
        if (Reset) {
	  print_data("reset thread data", &data, 0);
	  print_time("reset thread time", &time);

	  if (Group) {
	    /* reset group data which makes group inconsistent (-g -r) */
	    if ((rc = pm_reset_data_group(pid, tid)) > 0)
	      PMERROR("pm_reset_data_group", rc);
	    }
	  else {
	    /* reset thread data which shoudln't make group inconsistent (-p -r) */
	    if ((rc = pm_reset_data_thread(pid, tid)) > 0)
	      PMERROR("pm_reset_data_thread", rc);
	  }
        }
        else {
	  print_data("reprog thread data", &data, 0);
	  print_time("reprog thread time", &time);

	  /* get, delete and reset programmation */
	  if (Process) {
	    if ((rc = pm_get_program_group(pid, tid, &prog)) > 0)
	      PMERROR("pm_get_program_group", rc);
	  
	    print_prog("reprog group prog", &prog);
	  
	    if ((rc = pm_delete_program_group(pid, tid)) > 0)
	      PMERROR("pm_delete_program_group", rc);

	    if ((rc = pm_set_program_group(pid, tid, &prog)) > 0)
	      PMERROR("pm_set_program_group", rc);
	  }
	  else {
	    if ((rc = pm_get_program_thread(pid, tid, &prog)) > 0)
	      PMERROR("pm_get_program_thread", rc);
	  
	    print_prog("reprog thread", &prog);
	  
	    if ((rc = pm_delete_program_thread(pid, tid)) > 0)
	      PMERROR("pm_delete_program_thread", rc);
	  
	    if ((rc = pm_set_program_thread(pid, tid, &prog)) > 0)
	      PMERROR("pm_set_program_thread", rc);
	  }
	} /* Reprogram */
      }
      /* if read thread data point reached */
      else if (type == READTHREAD) {
	/* stop the counting */
	if ((rc = pm_stop_thread(pid, tid)) > 0)
	  PMERROR("pm_stop_thread", rc);

	/* read thread counter data and optional timestamp */
	if (Timestamp) {
	  if ((rc = pm_get_tdata_thread(pid, tid, &data, &time)) > 0)
	    PMERROR("pm_get_tdata_thread", rc);
	}
	else {
	  if ((rc = pm_get_data_thread(pid, tid, &data)) > 0)
	    PMERROR("pm_get_data_thread", rc);
	}
	
	/* check thread counter programmation */
	if ((rc = pm_get_program_thread(pid, tid, &prog)) > 0)
	  PMERROR("pm_get_program_thread", rc);

	if (!strcmp(Pminfo.proc_name, "POWER5"))
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[4]/data.accu[3]);
	else if (!strcmp(Pminfo.proc_name, "POWER5-II"))
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);
	else if (!strcmp(Pminfo.proc_name, "POWER6"))
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);
	else if (!strcmp(Pminfo.proc_name, "POWER7"))
	    printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);  
	else
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[0]/data.accu[1]);
	
	print_data("thread data", &data, 0);
	print_time("thread time", &time);
	print_prog("thread prog", &prog);
      }
      /* else a read group point has been reached */
      else {
	/* stop the counting */
	if ((rc = pm_stop_thread(pid, tid)) > 0)
	  PMERROR("pm_stop_thread", rc);
	
	/* read main thread counter data and optional timestamp */
	if (Timestamp) {
	  if ((rc = pm_get_tdata_thread(pid, tid, &data, &time)) > 0)
	    PMERROR("pm_get_tdata_thread", rc);
	}
	else {
	  if ((rc = pm_get_data_thread(pid, tid, &data)) > 0)
	    PMERROR("pm_get_data_thread", rc);
	}
	
	print_data("main thread data", &data, 0);
	print_time("main thread time", &time);
	
	if (Group || Process) {
	  /* read group counter data and optional timestamp */
	  if (Timestamp) {
	    if ((rc = pm_get_tdata_group(pid, tid, &data, &time)) > 0)
	      PMERROR("pm_get_tdata_group", rc);
	  }
	  else {
	    if ((rc = pm_get_data_group(pid, tid, &data)) > 0)
	      PMERROR("pm_get_data_group", rc);
	  }
	  
	  /* check group counter programmation */
	  if ((rc = pm_get_program_group(pid, tid, &prog)) > 0)
	    PMERROR("pm_get_program_group", rc);
	  
	  if (!strcmp(Pminfo.proc_name, "POWER5"))
	    printf("group ipc  : %3.1f\n", (float)data.accu[4]/data.accu[3]);
	  else if (!strcmp(Pminfo.proc_name, "POWER5-II"))
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);
	  else if (!strcmp(Pminfo.proc_name, "POWER6"))
		printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);
	  else if (!strcmp(Pminfo.proc_name, "POWER7"))
        printf ("thread %d ipc  : %3.1f\n", tid, (float)data.accu[1]/data.accu[3]);
	  else
	    printf("group ipc  : %3.1f\n", (float)data.accu[0]/data.accu[1]);
	  
	  print_data("group data", &data, 1);
	  print_time("group time", &time);
	  print_prog("group prog", &prog);
	}
	
	/* check thread counter programmation */
	if ((rc = pm_get_program_thread(pid, tid, &prog)) > 0)
	  PMERROR("pm_get_program_thread", rc);
	
	print_prog("main thread prog", &prog);
      }
      
      /* restore current instruction */
      if (ptrace(PT_WRITE_I, pid, (int *) iar, breakpoint[i].isave, 0) == -1)
	perror("could not write instruction");
      
      /*
       * read thread breakpoint and reprogram breakpoint
       * when not using groups need to be reached by
       * all threads and need to be restored after
       * executing the original instruction. This is
       * done by single-stepping only the thread which
       * has hit the breakpoint, and restoring the
       * breakpoint immediately after.
       */
      if (type == READTHREAD || (type == REPROGRAM && !Process && !Group)) {
	/* calculate address of next instruction */
	iaddr = (int *) ((breakpoint[i].addr)+4);
	
	/* read instruction */
	if ((inst = ptrace(PT_READ_I, pid, iaddr, 0, 0)) == -1)
	  perror("could not read instruction");
	
	/* make sure breakpoint not already installed */
	if (inst != TRAP) {
	  /* save the original instruction at that address */
	  breakpoint[i].isave2 = inst;
	  
	  /* write a trap instruction at this address */
	  if (ptrace(PT_WRITE_I, pid, iaddr, TRAP, 0) == -1)
	    perror("could not write instruction");
	}
	
	/* continue only thread which has hit the breakpoint */
	if (ptrace(PTT_CONTINUE, tid, 
		   (int *)1, contsig,
		   (int *)NULL) == -1) {
	  perror("could not continue");
	  exit(1);
	}
      }
      else {
	/* all other breakpoints are used only
	 * once and do not need to be restored */
	if (ptrace(PT_CONTINUE, pid, (int *) 1, contsig, (int *) NULL) == -1) {
	  perror("could not continue");
	  exit(1);
	}
      }
      
      break;
      
    default:
      if (ptrace(PT_CONTINUE, pid, (int *) 1, contsig, (int *) NULL) == -1) {
	perror("could not continue");
	exit(1);
      }
    } /* switch */
  } /* while */
  
} /* debugger() */

#endif /* ! __64BIT__ */

int
main(int argc, char *argv[])
{
  pm_data_t	stopdata, enddata, endgdata, faildata;
  pm_prog_t	prog, startprog, stopprog, endprog, endgprog, failprog;
  int		rval, rc, i, j;
  pthread_attr_t	attr;
  union wait	status;
  
	/* decode flags and set globals */
  process_args(argc, argv);

  if (Failtest) {
    /* init failprog to count nothing */
    failprog.mode.w = 0;
    for (i = 0; i < MAX_COUNTERS; i++)
      failprog.events[i] = COUNT_NOTHING;

    if (Group) {
      /* try to set the programmation before calling pm_initialize */
      if ((rc = pm_set_program_mygroup(&failprog)) > 0)
	PMERROR("pm_set_program_mythread", rc);
    }
    else {
      /* try to set the programmation before calling pm_initialize */
      if ((rc = pm_set_program_mythread(&failprog)) > 0)
	PMERROR("pm_set_program_mythread", rc);
    }
  }

#define PM_DEBUG	0x40000000 /* cut-n-pasted from pminternal.h */
	
  Pmginfo.event_groups = NULL;

  if ((rc = pm_initialize(
  		 PM_CAVEAT|PM_VERIFIED|PM_UNVERIFIED|PM_DEBUG|PM_GET_GROUPS,
		 &Pminfo, &Pmginfo, PM_CURRENT)) > 0)
    PMERROR("pm_init", rc);
    
  if (!EventGroup) { /* -G (software group : POWER3) */
     if ((!strcmp(Pminfo.proc_name, "POWER7")) ||
	     (!strcmp(Pminfo.proc_name, "POWER6")) ||
         (!strcmp(Pminfo.proc_name, "POWER5-II")) ||
         (!strcmp(Pminfo.proc_name, "POWER5")) ||
         (!strcmp(Pminfo.proc_name, "POWER4")) ||
         (!strcmp(Pminfo.proc_name, "PowerPC970")) ||
         (!strcmp(Pminfo.proc_name, "POWER4-II")) ||
         (!strcmp(Pminfo.proc_name, "PowerPC970MP"))) {
          EventGroup = 1;
     }
  }

  /* debugger test : fork and exec test program
   * and run debugger in parent process */
  if (Debugger) {
	  
#ifdef __64BIT__
    printf ("Trying to use debugger in 64bit mode!\n");
    exit(-1);
#endif
	  
#ifndef __64BIT__
    char arg2[10] = "0";
    char *arg1    = "100000000";

    if (fork() == 0) {
      /* child */
      if (ptrace(PT_TRACE_ME, 0, 0, 0, 0) < 0) {
	perror("could not trace child");
	exit(1);
      }

      if (Threads)
	sprintf(arg2, "%d", Threads);

      if (execl("/usr/pmapi/samples/ipc4", "ipc4", arg1, arg2, 0) < 0) {
	perror("could not exec child");
	exit(1);
      }
    }
    else {
      /* parent, just debug child */
      debugger();

      exit(0);
    }
#endif /* ! __64BIT__ */
  }

  /* if necessary, init data for multi-threaded tests */
  if (Threads) {
    GoWorkers    = 0;
    Go2Workers   = 0;
    ReadyWorkers = 0;
    LastWorker   = NOTID;

    if ((rc = pthread_attr_init(&attr)) > 0)
      PEXIT2("pthread_attr_init failed : rc = %d", rc);

    if ((rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_UNDETACHED)) > 0)
      PEXIT2("pthread_attr_setdetachstate failed : rc = %d", rc);

    if ((rc = pthread_mutex_init(&WorkLock, NULL)) != 0)
      PEXIT2("pthread_mutex_init failed : rc = %d", rc);

    if ((rc = pthread_cond_init(&CondWork, NULL)) != 0)
      PEXIT2("pthread_cond_init CondWork failed : rc = %d", rc);

    if ((rc = pthread_cond_init(&CondMain, NULL)) != 0)
      PEXIT2("pthread_cond_init CondMain failed : rc = %d", rc);
  }

  /* init counters to not count anything */
  for (i = 0; i < Pminfo.maxpmcs; i++)
    prog.events[i] = COUNT_NOTHING;

  /* prepare programmation */
  prog.mode.w       = 0;	/* start with clean mode */
  prog.mode.b.user  = 1;	/* count only user mode */
  prog.mode.b.count = 1;	/* start counting immediately */
  prog.events[1]    = 1;	/* count event 1 (cycles) for pmc2 */

  /* Is event group specified */
  prog.mode.b.is_group = EventGroup;
	
  printf("Processor type : %s\n", Pminfo.proc_name);

  if (!strcmp(Pminfo.proc_name, "POWER3") ||
     (!strcmp(Pminfo.proc_name, "POWER3-II"))) {
    if (Floatcount) {
      if (EventGroup) {
	prog.events[0] = 2;
      } else {
	prog.events[0] = 0;  /* count event 0  (cycles)       in pmc1 */
	prog.events[1] = 35; /* count event 35 (FPU1 instr's) in pmc2 */
	prog.events[4] = 5;  /* count event 5  (FPU0 instr's) in pmc5 */
      }
    } else {
      if (EventGroup) {
	prog.events[0] = 3;
      } else {
	prog.events[0] = 1;  /* count event 1 (instructions) in pmc1 */
      }
    }
	  
  } else  if ((!strcmp(Pminfo.proc_name, "PowerPC604")) ||
	      (!strcmp(Pminfo.proc_name, "PowerPC604e"))) {
    if (Floatcount)
      prog.events[0] = 15; /* count event 15 (FP instructions) in pmc1 */
    else
      prog.events[0] = 2;  /* count event 2 (instructions) in pmc1 */

  } else  if (!strcmp(Pminfo.proc_name, "MPC7450")) {
    if (Floatcount)
      prog.events[2] = 14; /* count event 14 (FPU instructions) in pmc3 */
    else
      prog.events[0] = 2;  /* count event 2 (instructions) in pmc1 */

  } else if (!strncmp(Pminfo.proc_name, "POWER4",6)) {
    if (Floatcount) {
      if (Floatcount == 1)
	prog.events[0] = 15;     /* Event Group #15, "Floating Point events */
      else
	prog.events[0] = 29;     /* Event Group #29, "Floating Point events
				    by unit" */
    }
    else
      prog.events[0] = 33;  /* Event Group #33, "Fix Point Unit events" */
      
  } else if ((!strncmp(Pminfo.proc_name, "PowerPC970",10)) ||
  	     (!strncmp(Pminfo.proc_name, "PowerPC970MP",12))) {
    if (Floatcount) {
      if (Floatcount == 1)
        prog.events[0] = 4;     /* Event Group #4, "Floating Point events */
      else
        prog.events[0] = 11;     /* Event Group #11, "Floating Point events
                                    by unit" */
    }
    else {
      prog.events[0] = 38;  /* Event Group #38, "Fix Point Unit events" */
    }

  } else if (!strcmp(Pminfo.proc_name, "POWER5")) {
    if (Floatcount) {
      if (Floatcount == 1)
        prog.events[0] = 79;  /* Event Group #79, "Floating Point events */
      else
        prog.events[0] = 80;  /* Event Group #80, "Floating Point events */
    }
    else
      prog.events[0] = 91;  /* Event Group #91, "Fix Point events" */
	  
  } else if (!strcmp(Pminfo.proc_name, "POWER5-II")) {
    if (Floatcount) {
      if (Floatcount == 1)
        prog.events[0] = 82;  /* Event Group #82, "Floating Point events */
      else
        prog.events[0] = 83;  /* Event Group #83, "Floating Point events */
    }
    else
      prog.events[0] = 94;  /* Event Group #94, "Fix Point events" */

  } else if (!strcmp(Pminfo.proc_name, "POWER6")) {
    if (Floatcount) {
      if (Floatcount == 1)
        prog.events[0] = 127;  /* Event Group #127, "Floating Point events */
      else
        prog.events[0] = 128;  /* Event Group #128, "Floating Point events */
    }
    else
      prog.events[0] = 83;  /* Event Group #83, "Fix Point events" */

 } else if (!strcmp(Pminfo.proc_name, "POWER7")) {
   if (Floatcount) {
      if (Floatcount == 1)
          prog.events[0] = 128;  /* Event Group #127, "Floating Point events */
      else
          prog.events[0] = 132;  /* Event Group #128, "Floating Point events */
     }
    else
      prog.events[0] = 52;  /* Event Group #83, "Fix Point events" */
	  
  } else  if ((!strcmp(Pminfo.proc_name, "RS64-II")) ||
	      (!strcmp(Pminfo.proc_name, "RS64-III"))) {
    if (Floatcount)
      prog.events[0] = 112;  /* event 112 (FP instructions) in pmc1 */
    else
      prog.events[0] = 3;  /* event 2 (instructions) in pmc1 */
  }

  if (EventGroup)
    printf("Group chosen : %d\n", prog.events[0]);

  if (Group) {
    /* create a group with this thread as a member */
    if ((rc = pm_set_program_mygroup(&prog)) > 0)
      PMERROR("pm_set_program_mygroup", rc);

		/* reread the programmation */
    if ((rc = pm_get_program_mygroup(&startprog)) > 0)
      PMERROR("pm_get_program_mygroup", rc);
  }
  else {
    /* create and init a pm context for this thread */
    if ((rc = pm_set_program_mythread(&prog)) > 0)
      PMERROR("pm_set_program_mythread", rc);

		/* reread the programmation */
    if ((rc = pm_get_program_mythread(&startprog)) > 0)
      PMERROR("pm_get_program_mythread", rc);

    if (Failtest)
      if ((rc = pm_get_program_mygroup(&failprog)) > 0)
	PMERROR("pm_get_program_mygroup", rc);
  }

  /* init thread private data for first worker */
  init_tdata(0);

  /* stop counting for this thread */
  if ((rc = pm_stop_mythread()) > 0)
    PMERROR("pm_stop_mythread", rc);

	/* try to get group data for non existent group */
  if (Failtest && !Group) {
    if ((rc = pm_get_data_mygroup(&faildata)) > 0)
      PMERROR("pm_get_data_mygroup", rc);
  }

  /* read pm data for this thread */
  if ((rc = pm_get_data_mythread(&stopdata)) > 0)
    PMERROR("pm_get_data_mythread", rc);

	/* also read pm programmation for this thread */
  if ((rc = pm_get_program_mythread(&stopprog)) > 0)
    PMERROR("pm_get_program_mythread", rc);

  if (!Threads)
    /* no threads, just execute worker code */
    worker((void *) &Tdata[0]);
  else {
    if (Process) {
      /* to create process group, we first need to delete
       * the existing pm programmation for this thread and
       * its group if one exists */
      if (Group) {
	if ((rc = pm_delete_program_mygroup()) > 0)
	  PMERROR("pm_delete_program_mygroup", rc);
      }
      else {
	if ((rc = pm_delete_program_mythread()) > 0)
	  PMERROR("pm_delete_program_mythread", rc);
      }
    }

    /* init private data for all the workers */
    for (i = 1; i < Threads; i++)
      init_tdata(i);

    /* create workers */
    for (i = 0; i < Threads; i++)
      if ((rc = pthread_create(&Threadid[i], &attr, worker, (void *) &Tdata[i])) > 0)
	PEXIT3("pthread_create[%d] failed : rc = %d", i, rc);

    /* before creating a process group, wait
     * for all the threads to be blocked */
    if (Process) {
      if ((rc = pthread_mutex_lock(&WorkLock)) != 0)
	PEXIT2("pthread_mutex_lock failed : rc = %d", rc);

      /* wait for last worker thread to signal
       * main thread that all workers are ready */
      while (ReadyWorkers != 1)
	if ((rc = pthread_cond_wait(&CondMain, &WorkLock)) != 0)
	  PEXIT2("pthread_cond_wait failed : rc = %d", rc);

      prog.mode.b.process = 1; /* process group */
      prog.mode.b.count   = 0; /* count is off */

      /* create a process group with this thread and all the workers */
      if ((rc = pm_set_program_mygroup(&prog)) > 0)
	PMERROR("pm_set_program_mygroup(process)", rc);

      /* start the counting for the group, note that 
       * this doesn't start the counting for any
       * thread yet, because when threads are in a
       * group, their counting state is an AND between
       * the group state and the thread state which 
       * was initialized in this case as not counting.
       * when the worker will hit pm_start_mythread,
       * the counting will effectively begin */
      if ((rc = pm_start_mygroup()) > 0)
	PMERROR("pm_start_mygroup(process)", rc);

      /* release all the blocked workers */
      GoWorkers = 1;

      if ((rc = pthread_cond_broadcast(&CondWork)) != 0)
	PEXIT2("pthread_cond_broadcast failed : rc = %d", rc);
		
      if ((rc = pthread_mutex_unlock(&WorkLock)) != 0)
	PEXIT2("pthread_mutex_unlock failed : rc = %d", rc);
    }

    /* wait for workers to finish */
    for (i = 0; i < Threads; i++)
      if ((rc = pthread_join(Threadid[i], (void *) &rval)) > 0)
	PEXIT3("pthread_join[i] failed : rc = %d", i, rc);
  }

  /* get group data and programmation if necessary */
  if (Group || Process) {
    if (Reprogram) {
      /* stop counting for this thread
       * because it was restarted when
       * group was reprogrammed */
      if ((rc = pm_stop_mythread()) > 0)
	PMERROR("pm_stop_mythread", rc);
    }

    if ((rc = pm_get_data_mygroup(&endgdata)) > 0)
      PMERROR("pm_get_data_mygroup", rc);

    if ((rc = pm_get_program_mygroup(&endgprog)) > 0)
      PMERROR("pm_get_program_mygroup", rc);
  }

  /* read pm data for this thread */
  if ((rc = pm_get_data_mythread(&enddata)) > 0)
    PMERROR("pm_get_data_mythread", rc);

	/* read pm programmation for this thread */
  if ((rc = pm_get_program_mythread(&endprog)) > 0)
    PMERROR("pm_get_program_mythread", rc);

	/* try several things supposed to fail */
  if (Failtest) {
    if (!Group) {
      /* fails when !Group && !Process */
      if ((rc = pm_delete_program_mygroup()) > 0)
	PMERROR("pm_delete_program_mygroup", rc);

      /* fails when Process */
      if ((rc = pm_delete_program_mythread()) > 0)
	PMERROR("pm_delete_program_mythread", rc);

      /* get thread programmation after it has been deleted */
      if ((rc = pm_get_program_mythread(&failprog)) > 0)
	PMERROR("pm_get_program_mythread", rc);
      else
	print_prog("after delete", &failprog);
    }
    else {
      if ((rc = pm_delete_program_mygroup()) > 0)
	PMERROR("pm_delete_program_mygroup", rc);

      /* get group programmation after it has been deleted */
      if ((rc = pm_get_program_mygroup(&failprog)) > 0)
	PMERROR("pm_get_program_mygroup", rc);
      else
	print_prog("after delete", &failprog);
    }

    /* no results for failing test, except
     * for error messages of course */
    exit(0);
  }

  /* print reread programmation at
	 * the beginning of main thread */
  print_prog("start", &startprog);

  /* print programmation and data after main
	 * thread stopped its counting */
  print_prog("stop", &stopprog);
  print_data("stop", &stopdata, GROUP);

  if (!Threads) {
    /* print pm data after first and second busy loops
     * and thread programmation on entry and after first
     * loop. if a group was created, also print data and
     * programmation for it at the end of worker */
    printf("in worker :\n");

    print_prog("   enter", &(Tdata[0].prog));
    print_prog("   [1]",   &(Tdata[0].prog1));
    print_data("   [1]",   &(Tdata[0].data1), NOGROUP);
    print_time("   [1]",   &(Tdata[0].tstamp1));
    print_data("   [2]",   &(Tdata[0].data2), NOGROUP);
    print_time("   [2]",   &(Tdata[0].tstamp2));

    if (Group) {
      print_prog("   group[2]", &(Tdata[0].gprog));
      print_data("   group[2]", &(Tdata[0].gdata), GROUP);
      print_time("   group[2]", &(Tdata[0].gtstamp));
    }
  }
  else {
    /* print same information as above in the
     * multi-threaded cases, up to 8 workers.
     * group information is printed as
     * collected by last worker to finish */
    if (Threads <= 8) {
      for (i = 0; i < Threads; i++) {
	printf("in worker thread %d :\n", i+1);

	print_prog("   enter", &(Tdata[i].prog));
	print_prog("   [1]",   &(Tdata[i].prog1));
	print_data("   [1]",   &(Tdata[i].data1), NOGROUP);
        print_time("   [1]",   &(Tdata[i].tstamp1));
	print_data("   [2]",   &(Tdata[i].data2), NOGROUP);
        print_time("   [2]",   &(Tdata[i].tstamp2));

	/* print group info only once, as collected by last worker */
	if ((Group || Process) && (Tdata[i].tid == LastWorker)) {
	  print_prog("   group[2]", &(Tdata[i].gprog));
	  print_data("   group[2]", &(Tdata[i].gdata), GROUP);
          print_time("   group[2]", &(Tdata[i].gtstamp));
	}
      }
    }

    /* if more than 8 threads or a group was created,
		 * calculate and print sum of threads pm data */
    if (Group || Process || Threads > 8) {
      unsigned long long sum[MAX_COUNTERS];

      for (j = 0; j < Pminfo.maxpmcs; j++)
	sum[j] = 0;

      for (i = 0; i < Threads; i++)
	for (j = 0; j < Pminfo.maxpmcs; j++)
	  sum[j] += Tdata[i].data2.accu[j];

      printf("end sum : counts = ");
      for (j = 0; j < Pminfo.maxpmcs-1; j++)
	printf("%llu,", sum[j] + enddata.accu[j]);
      printf("%llu\n", sum[Pminfo.maxpmcs-1] + enddata.accu[Pminfo.maxpmcs-1]);
    }
  }

  /* if necessary, print group data and
	 * programmation saved at the end of test */
  if (Group || Process) {
    print_prog("end group", &endgprog);
    print_data("end group", &endgdata, GROUP);
  }

  /* print thread data and programmation
	 * captured at the end of test */
  print_prog("end", &endprog);
  print_data("end", &enddata, NOGROUP);
  
} /* main */
