/**********************************************************************/
/*                                                                    */
/*   Licensed Materials - Property of IBM.                            */
/*   IBM XL C/C++ for Linux, V16.1.1 (RC 3)                           */
/*   IBM XL Fortran for Linux, V16.1.1 (RC 3)                         */
/*   Copyright IBM Corp. 2012, 2013.                                  */
/*   US Government Users Restricted Rights -                          */
/*   Use, duplication or disclosure restricted by                     */
/*   GSA ADP Schedule Contract with IBM Corp.                         */
/*                                                                    */
/**********************************************************************/

/*****************************************************************************
 * IBM Light OpenMP C++ Implementation 
 *****************************************************************************/

#ifndef _OMP_H_
#define _OMP_H_ 1

#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>

/*****************************************************************************
 * Defines
 *****************************************************************************/

/* identify LOMP runtime, and provide its version number */
#define _LOMP 1.1

 /* local defines (undef at end) */
#ifdef __cplusplus
  #define _OMP_EXTERN extern "C"
  #define _OMP_INLINE static inline
#else
  #define _OMP_EXTERN extern
  #define _OMP_INLINE static __inline
#endif


/*****************************************************************************
 * OpenMP types
 *****************************************************************************/

typedef uint32_t omp_lock_t;      /* arbitrary type of the right length */
typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */

typedef enum omp_sched_t {
  omp_sched_static         = 1, /* chunkSize >0 */
  omp_sched_dynamic        = 2, /* chunkSize >0 */
  omp_sched_guided         = 3, /* chunkSize >0 */
  omp_sched_auto           = 4, /* no chunkSize */
  ibm_sched_affinity       = 1000 /* deprecated IBM extension, use guided */
} omp_sched_t;

typedef enum omp_proc_bind_t {
  omp_proc_bind_false = 0,
  omp_proc_bind_true = 1,
  omp_proc_bind_master = 2,
  omp_proc_bind_close = 3,
  omp_proc_bind_spread = 4
} omp_proc_bind_t;

typedef enum omp_lock_hint_t {
  omp_lock_hint_none = 0,
  omp_lock_hint_uncontended = 1,
  omp_lock_hint_contended = 2,
  omp_lock_hint_nonspeculative = 4,
  omp_lock_hint_speculative = 8
} omp_lock_hint_t;

typedef enum omp_pause_resource_t {
  omp_pause_soft = 1,
  omp_pause_hard = 2
} omp_pause_resource_t;

/*****************************************************************************
 * Has OpenMP
 *****************************************************************************/

#if defined(_OPENMP)

_OMP_EXTERN double omp_get_wtick(void); 
_OMP_EXTERN double omp_get_wtime(void); 

_OMP_EXTERN void omp_set_num_threads(int num); 
_OMP_EXTERN int  omp_get_num_threads(void);  
_OMP_EXTERN int  omp_get_max_threads(void); 
_OMP_EXTERN int  omp_get_thread_limit(void); 
_OMP_EXTERN int  omp_get_thread_num(void); 
_OMP_EXTERN int  omp_get_num_procs(void); 
_OMP_EXTERN int  omp_in_parallel(void); 
_OMP_EXTERN int  omp_in_final(void); 

_OMP_EXTERN void omp_set_dynamic(int flag); 
_OMP_EXTERN int  omp_get_dynamic(void);  

_OMP_EXTERN void omp_set_nested(int flag);
_OMP_EXTERN int  omp_get_nested(void);

_OMP_EXTERN void omp_set_max_active_levels(int level);
_OMP_EXTERN int  omp_get_max_active_levels(void);

_OMP_EXTERN int  omp_get_level(void); 
_OMP_EXTERN int  omp_get_active_level(void); 
_OMP_EXTERN int  omp_get_ancestor_thread_num(int level); 
_OMP_EXTERN int  omp_get_team_size(int level); 

#if ! defined(LOMP_API)
_OMP_EXTERN void omp_init_lock(omp_lock_t *lock);
_OMP_EXTERN void omp_init_lock_with_hint(omp_lock_t *lock, omp_lock_hint_t hint);
_OMP_EXTERN void omp_init_nest_lock(omp_nest_lock_t *lock);
_OMP_EXTERN void omp_init_nest_lock_with_hint(omp_nest_lock_t *lock, omp_lock_hint_t hint);

_OMP_EXTERN void omp_destroy_lock(omp_lock_t *lock);
_OMP_EXTERN void omp_destroy_nest_lock(omp_nest_lock_t *lock);

_OMP_EXTERN void omp_set_lock(omp_lock_t *lock);
_OMP_EXTERN void omp_set_nest_lock(omp_nest_lock_t *lock);

_OMP_EXTERN void omp_unset_lock(omp_lock_t *lock);
_OMP_EXTERN void omp_unset_nest_lock(omp_nest_lock_t *lock);

_OMP_EXTERN int  omp_test_lock(omp_lock_t *lock);
_OMP_EXTERN int  omp_test_nest_lock(omp_nest_lock_t *lock);
#endif

_OMP_EXTERN void omp_get_schedule(omp_sched_t * kind, int * modifier); 
_OMP_EXTERN void omp_set_schedule(omp_sched_t kind, int modifier); 

_OMP_EXTERN omp_proc_bind_t omp_get_proc_bind(void); 
_OMP_EXTERN int  omp_get_cancellation(void); 
_OMP_EXTERN void omp_set_default_device(int deviceId); 
_OMP_EXTERN int  omp_get_default_device(void); 
_OMP_EXTERN int  omp_get_num_devices(void); 
_OMP_EXTERN int  omp_get_num_teams(void); 
_OMP_EXTERN int  omp_get_team_num(void); 
_OMP_EXTERN int  omp_is_initial_device(void);

_OMP_EXTERN int   omp_get_initial_device(); 
_OMP_EXTERN void *omp_target_alloc(size_t size, int deviceId);  
_OMP_EXTERN void  omp_target_free(void *devicePtr, int deviceId); 
_OMP_EXTERN int   omp_target_is_present(void *hostAddr, int deviceId);  
_OMP_EXTERN int   omp_target_associate_ptr(void *hostAddr, void *deviceAddr, 
  size_t size, size_t deviceOffset, int deviceId);
_OMP_EXTERN int   omp_target_disassociate_ptr(void *hostAddr, int deviceId); 
_OMP_EXTERN int   omp_target_memcpy(void *dstAddr, void *srcAddr, size_t length,  
  size_t dstOffset, size_t srcOffset, int dstDeviceId, int srcDeviceId);
_OMP_EXTERN int   omp_target_memcpy_rect(void *dstAddr, void *srcAddr, 
  size_t elementSize, int numDim, const size_t *volume, const size_t *dstOffset,
  const size_t *srcOffset, const size_t *dstDim, const size_t *srcDim,
  int dstDeviceId, int srcDeviceId);

_OMP_EXTERN int omp_get_num_places();
_OMP_EXTERN int omp_get_place_num_procs(int place_num);
_OMP_EXTERN void omp_get_place_proc_ids(int place_num, int *ids);
_OMP_EXTERN int omp_get_place_num();
_OMP_EXTERN int omp_get_partition_num_places();
_OMP_EXTERN void omp_get_partition_place_nums(int *place_nums);

_OMP_EXTERN int omp_get_max_task_priority();

_OMP_EXTERN int omp_pause_resource(omp_pause_resource_t kind, int device_num);
_OMP_EXTERN int omp_pause_resource_all(omp_pause_resource_t kind);

_OMP_EXTERN void omp_display_affinity(char const *format);
_OMP_EXTERN size_t omp_capture_affinity(char *buffer, size_t size, char const *format);
_OMP_EXTERN void omp_set_affinity_format(char const *format);
_OMP_EXTERN size_t omp_get_affinity_format(char *buffer, size_t size);


/* non standard */
_OMP_EXTERN void omp_set_proc_bind(omp_proc_bind_t kind); 

/* ibm specific */
_OMP_EXTERN int _xl_thread_binding();
_OMP_EXTERN int _xl_thread_place();
_OMP_EXTERN int _xlsmp_get_place_num();
_OMP_EXTERN unsigned int *_xlsmp_omp_places();

#else 


/*****************************************************************************
 * Does not have OpenMP
 *****************************************************************************/

_OMP_INLINE double omp_get_wtick(void)                        { return 0.0; }
_OMP_INLINE double omp_get_wtime(void)                        { return 0.0; }

_OMP_INLINE void omp_set_schedule(omp_sched_t kind, int modifier) { }
_OMP_INLINE int omp_get_schedule(omp_sched_t * kind, int * modifier) 
  { return omp_sched_static; }

_OMP_INLINE void omp_set_num_threads(int num)                 { }
_OMP_INLINE int omp_get_num_threads(void)                     { return 1; }
_OMP_INLINE int omp_get_max_threads(void)                     { return 1; }
_OMP_INLINE int omp_get_thread_limit(void)                    { return 1; }
_OMP_INLINE int omp_get_thread_num(void)                      { return 0; }
_OMP_INLINE int omp_get_num_procs(void)                       { return 1; }
_OMP_INLINE int omp_in_parallel(void)                         { return 0; }
_OMP_INLINE int omp_in_final(void)                            { return 0; }
_OMP_INLINE void omp_set_dynamic(int flag)                    { }
_OMP_INLINE int omp_get_dynamic(void)                         { return 0; }
_OMP_INLINE void omp_set_nested(int flag)                     { }
_OMP_INLINE int omp_get_nested(void)                          { return 0; }
_OMP_INLINE void omp_set_max_active_levels(int level)         { }
_OMP_INLINE int omp_get_max_active_levels(int level)          { return 1; }
_OMP_INLINE int omp_get_level(void)                           { return 0; }
_OMP_INLINE int omp_get_active_level(void)                    { return 0; }
_OMP_INLINE int omp_get_ancestor_thread_num(int level)        { return 0; }
_OMP_INLINE int omp_get_team_size(int level)                  { return 1; }
							      
_OMP_INLINE void omp_init_lock(omp_lock_t  *lock)             { }
_OMP_INLINE void omp_init_lock_with_hint(omp_lock_t  *lock,
   omp_lock_hint_t hint)                                      { }
_OMP_INLINE void omp_destroy_lock(omp_lock_t *lock)           { }
_OMP_INLINE void omp_set_lock(omp_lock_t *lock)               { }
_OMP_INLINE void omp_unset_lock(omp_lock_t *lock)             { }
_OMP_INLINE int omp_test_lock(omp_lock_t *lock)               { return 1; }
							      
_OMP_INLINE void omp_init_nest_lock(omp_nest_lock_t *lock)    { *lock = 0; }
_OMP_INLINE void omp_init_nest_lock_with_hint(
  omp_nest_lock_t *lock, omp_lock_hint_t hint)                { *lock = 0; }
_OMP_INLINE void omp_destroy_nest_lock(omp_nest_lock_t *lock) { }
_OMP_INLINE void omp_set_nest_lock(omp_nest_lock_t *lock)     { (*lock)++; }
_OMP_INLINE void omp_unset_nest_lock(omp_nest_lock_t *lock)   { (*lock)--; }
_OMP_INLINE int omp_test_nest_lock(omp_nest_lock_t *lock) 
  { (*lock)++; return *lock; }

_OMP_INLINE omp_proc_bind_t omp_get_proc_bind(void)           
  { return omp_proc_bind_false ;}
_OMP_INLINE int  omp_get_cancellation(void)		                { return 0; }
_OMP_INLINE void omp_set_default_device(int deviceId)	        { }
_OMP_INLINE int  omp_get_default_device(void)		              { return 0; }
_OMP_INLINE int  omp_get_num_devices(void)		                { return 0; }
_OMP_INLINE int  omp_get_num_teams(void)		                  { return 1; }
_OMP_INLINE int  omp_get_team_num(void) 		                  { return 0; }
_OMP_INLINE int  omp_is_initial_device(void)                  { return 1; } 

_OMP_INLINE int omp_get_initial_device()                    { return -10; }
_OMP_INLINE void *omp_target_alloc(size_t size, int deviceId) { 
 if (deviceId != -10) return 0;
 return malloc(size);
}
_OMP_INLINE void omp_target_free(void *devicePtr, int deviceId) {
  if (deviceId != -10) return;
  free(devicePtr);
}
_OMP_INLINE int omp_target_is_present(void *hostAddr, int deviceId) {
  return 1; 
}
_OMP_INLINE int omp_target_associate_ptr(void *hostAddr, void *deviceAddr, 
  size_t size, size_t deviceOffset, int deviceId) {
  return 1; /* error value */
}
_OMP_INLINE int omp_target_disassociate_ptr(void *hostAddr, int deviceId) {
  return 1; /* error value */
}
_OMP_INLINE int omp_target_memcpy(void *dstAddr, void *srcAddr, size_t length, 
  size_t dstOffset, size_t srcOffset, int dstDeviceId, int srcDeviceId) {
  if (dstDeviceId != -10 || srcDeviceId != -10 || ! dstAddr || ! srcAddr) {
    return 1; /* error value */
  }
  memcpy((void *)((char*) dstAddr+dstOffset), (const void *)((const char*) srcAddr+srcOffset), (size_t) length);
  return 0;
}
_OMP_INLINE int omp_target_memcpy_rect(void *dstAddr, void *srcAddr, 
  size_t elementSize, int numDim, const size_t *volume, 
  const size_t *dstOffset, const size_t *srcOffset, 
  const size_t *dstDim, const size_t *srcDim,
  int dstDeviceId, int srcDeviceId) {
  /* request max dim supported, infinite */ 
  if (!dstAddr && !srcAddr) return 0x7FFFFFFF;
  if (!volume || !dstOffset || !srcOffset || !dstDim || !srcDim || numDim<1) return 1;
  if (numDim == 1) {
    return omp_target_memcpy(dstAddr, srcAddr, elementSize*volume[0],
      elementSize*dstOffset[0], elementSize*srcOffset[0], dstDeviceId, srcDeviceId);
  } else {
    int i, rc;
    size_t dstSliceSize, srcSliceSize, dstOff, srcOff, s;
    dstSliceSize = elementSize;
    srcSliceSize = elementSize;
    for(i=1; i<numDim; i++) {
      dstSliceSize *= dstDim[i];
      srcSliceSize *= srcDim[i];
    }
    dstOff = dstOffset[0] * dstSliceSize;
    srcOff = srcOffset[0] * dstSliceSize;
    for(s=0; s<volume[0]; s++) {
      rc = omp_target_memcpy_rect(
        (char *)dstAddr + dstOff + dstSliceSize*s,
        (char *)srcAddr + srcOff + srcSliceSize*s,
        elementSize, numDim-1, 
        (const size_t *) volume +1,
        (const size_t *) dstOffset +1, (const size_t *) srcOffset +1, 
        (const size_t *) dstDim +1, (const size_t *) srcDim +1,
        dstDeviceId, srcDeviceId);
      if (rc) return rc;
    }
  }
  return 0;
}
_OMP_INLINE int omp_get_num_places()                          { return 0; }
_OMP_INLINE int omp_get_place_num_procs(int place_num)        { return 0; }
_OMP_INLINE void omp_get_place_proc_ids(int place_num, int *ids) { }
_OMP_INLINE int omp_get_place_num()                           { return 0; }
_OMP_INLINE int omp_get_partition_num_places()                { return 0; }
_OMP_INLINE void omp_get_partition_place_nums(int *place_nums) { }

_OMP_INLINE int omp_get_max_task_priority()                   { return 0; }
_OMP_INLINE int omp_pause_resource(omp_pause_resource_t kind, int device_num)
                                                              { return 0; }
_OMP_INLINE int omp_pause_resource_all(omp_pause_resource_t kind)
                                                              { return 0; }

_OMP_INLINE void omp_display_affinity(char const *format)     { }
_OMP_INLINE size_t omp_capture_affinity(char *buffer,
  size_t size, char const *format)                            { return 0; }
_OMP_INLINE void omp_set_affinity_format(char const *format)  { }
_OMP_INLINE size_t omp_get_affinity_format(char *buffer,
  size_t size)                                                { return 0; }

/* non standard */
_OMP_INLINE void omp_set_proc_bind(omp_proc_bind_t kind)      { }
_OMP_INLINE void omp_get_places(char *str, int len)         { }
_OMP_INLINE int omp_get_thread_place()                        { return -1; }

/* ibm specific */
_OMP_INLINE int _xl_thread_binding()                          { return -1; }
_OMP_INLINE int _xl_thread_place()                            { return -1; }
_OMP_INLINE int _xlsmp_get_place_num()                        { return -1; }
_OMP_INLINE unsigned int *_xlsmp_omp_places()                 { return (unsigned int *) 0;}

#endif

/*****************************************************************************
 * OMPT API for tool
 *****************************************************************************/

/* OMPT states */
typedef enum omp_state_t {
  omp_state_work_serial                     = 0x000,
  omp_state_work_parallel                   = 0x001,
  omp_state_work_reduction                  = 0x002,

  omp_state_wait_barrier                    = 0x010,
  omp_state_wait_barrier_implicit_parallel  = 0x011,
  omp_state_wait_barrier_implicit_workshare = 0x012,
  omp_state_wait_barrier_implicit           = 0x013,
  omp_state_wait_barrier_explicit           = 0x014,

  omp_state_wait_taskwait                   = 0x020,
  omp_state_wait_taskgroup                  = 0x021,

  omp_state_wait_mutex                      = 0x040,
  omp_state_wait_lock                       = 0x041,
  omp_state_wait_critical                   = 0x042,
  omp_state_wait_atomic                     = 0x043,
  omp_state_wait_ordered                    = 0x044,
                                            
  omp_state_wait_target                     = 0x080,
  omp_state_wait_target_map                 = 0x081,
  omp_state_wait_target_update              = 0x082,
                                            
  omp_state_idle                            = 0x100, 
  omp_state_overhead                        = 0x101, 
  omp_state_undefined                       = 0x102
} omp_state_t;

/* OMPT events */
typedef enum ompt_callbacks_t {
  ompt_callback_undefined                = 0,
  ompt_callback_thread_begin             = 1,
  ompt_callback_thread_end               = 2,
  ompt_callback_parallel_begin           = 3,
  ompt_callback_parallel_end             = 4,
  ompt_callback_task_create              = 5,
  ompt_callback_task_schedule            = 6,
  ompt_callback_implicit_task            = 7,
  ompt_callback_target                   = 8,
  ompt_callback_target_data_op           = 9,
  ompt_callback_target_submit            = 10,
  ompt_callback_control_tool             = 11,
  ompt_callback_device_initialize        = 12,
  ompt_callback_idle                     = 13,
  ompt_callback_sync_region_wait         = 14,
  ompt_callback_mutex_released           = 15,
  ompt_callback_task_dependences         = 16,
  ompt_callback_task_dependence          = 17,
  ompt_callback_work                     = 18,
  ompt_callback_master                   = 19,
  ompt_callback_target_map               = 20,
  ompt_callback_sync_region              = 21,
  ompt_callback_lock_init                = 22,
  ompt_callback_lock_destroy             = 23,
  ompt_callback_mutex_acquire            = 24,
  ompt_callback_mutex_acquired           = 25,
  ompt_callback_nest_lock                = 26,
  ompt_callback_flush                    = 27,
  ompt_callback_cancel                   = 28
} ompt_callbacks_t;

/* OMPT types */
typedef struct ompt_frame_s {
  void *exit_frame;
  void *enter_frame;
} ompt_frame_t;

typedef void (*ompt_callback_t)(void);
typedef uint64_t ompt_id_t;
#define ompt_id_none 0
typedef union ompt_data_u {
  uint64_t value; 
  void *ptr;                     
} ompt_data_t;
static const ompt_data_t ompt_data_none = {0};         
typedef uint64_t ompt_wait_id_t;
#define ompt_wait_id_none 0
typedef void ompt_device_t;
typedef uint64_t ompt_device_time_t;
#define ompt_time_none 0
typedef void ompt_buffer_t; 
typedef uint64_t ompt_buffer_cursor_t;
typedef struct ompt_task_dependence_s {
  void *variable_addr;
  unsigned int dependence_flags;
} ompt_task_dependence_t;
typedef enum ompt_thread_type_e {
  ompt_thread_initial                 = 1,
  ompt_thread_worker                  = 2,
  ompt_thread_other                   = 3,
  ompt_thread_unknown                 = 4
} ompt_thread_type_t;
typedef enum ompt_scope_endpoint_e {
  ompt_scope_begin                    = 1,
  ompt_scope_end                      = 2
} ompt_scope_endpoint_t;
typedef enum ompt_sync_region_kind_e {
  ompt_sync_region_barrier            = 1, 
  ompt_sync_region_taskwait           = 2,
  ompt_sync_region_taskgroup          = 3
} ompt_sync_region_kind_t;
typedef enum ompt_target_data_op_e {
  ompt_target_data_alloc              = 1,
  ompt_target_data_transfer_to_dev    = 2,
  ompt_target_data_transfer_from_dev  = 3,
  ompt_target_data_delete             = 4
} ompt_target_data_op_t;
typedef enum ompt_work_type_e {
  ompt_work_loop               = 1, 
  ompt_work_sections           = 2,
  ompt_work_single_executor    = 3,
  ompt_work_single_other       = 4,
  ompt_work_workshare          = 5,
  ompt_work_distribute         = 6,
  ompt_work_taskloop           = 7
} ompt_work_type_t;
typedef enum ompt_mutex_kind_e {
  ompt_mutex_kind_unknown             = 0,
  ompt_mutex                          = 0x10,
  ompt_mutex_lock                     = 0x11,
  ompt_mutex_nest_lock                = 0x12,
  ompt_mutex_critical                 = 0x13,
  ompt_mutex_atomic                   = 0x14,
  ompt_mutex_ordered                  = 0x20
} ompt_mutex_kind_t;
typedef enum ompt_native_mon_flags_e {
  ompt_native_data_motion_explicit    = 1,
  ompt_native_data_motion_implicit    = 2,
  ompt_native_kernel_invocation       = 4,
  ompt_native_kernel_execution        = 8,
  ompt_native_driver                  = 16,
  ompt_native_runtime                 = 32,
  ompt_native_overhead                = 64,
  ompt_native_idleness                = 128
} ompt_native_mon_flags_t;
typedef enum  ompt_task_type_e {
  ompt_task_initial                   = 1,
  ompt_task_implicit                  = 2,
  ompt_task_explicit                  = 3,
  ompt_task_target                    = 4
} ompt_task_type_t;
typedef enum  ompt_task_status_e {
  ompt_task_complete  = 1,
  ompt_task_yield     = 2,
  ompt_task_cancel    = 3,
  ompt_task_others    = 4
} ompt_task_status_t;
typedef enum ompt_target_type_e {
  ompt_target                         = 1,
  ompt_target_enter_data              = 2,
  ompt_target_exit_data               = 3,
  ompt_target_update                  = 4
} ompt_target_type_t;
typedef enum ompt_invoker_e {
  ompt_invoker_program = 1, 
  ompt_invoker_runtime = 2  
} ompt_invoker_t;
typedef enum ompt_target_map_flag_e {
  ompt_target_map_flag_to             = 1,
  ompt_target_map_flag_from           = 2,
  ompt_target_map_flag_alloc          = 4,
  ompt_target_map_flag_release        = 8, 
  ompt_target_map_flag_delete         = 16,
  ompt_target_map_flag_implicit       = 32
} ompt_target_map_flag_t;
typedef enum ompt_task_dependence_flag_e {
  ompt_task_dependence_type_out       = 1,
  ompt_task_dependence_type_in        = 2,
  ompt_task_dependence_type_inout     = 3
} ompt_task_dependence_flag_t;
typedef enum ompt_cancel_flag_e {
  ompt_cancel_parallel     = 0x1,
  ompt_cancel_sections     = 0x2,
  ompt_cancel_do           = 0x4,
  ompt_cancel_taskgroup    = 0x8,
  ompt_cancel_activated    = 0x10,
  ompt_cancel_detected     = 0x20
} ompt_cancel_flag_t;
typedef uint64_t ompt_hwid_t;
#define ompt_hwid_none 0


/* OMPT init and finalization */
struct ompt_fns_t;
typedef void (*ompt_interface_fn_t)(void);
typedef ompt_interface_fn_t (*ompt_function_lookup_t) (
  const char *interface_function_name
);
typedef int (*ompt_initialize_t) (
  ompt_function_lookup_t lookup,
  struct ompt_fns_t *fns
);
typedef void (*ompt_finalize_t) (
  struct ompt_fns_t *fns
);
typedef struct ompt_fns_t {
  ompt_initialize_t initialize;
  ompt_finalize_t finalize;
} ompt_fns_t;
_OMP_EXTERN ompt_fns_t *ompt_start_tool(
  unsigned int omp_version,
  const char *runtime_version
);

/* callback */
typedef void (*ompt_callback_thread_begin_t) (
  ompt_thread_type_t thread_type,
  ompt_data_t *thread_data
);
typedef void (*ompt_callback_thread_end_t) (
  ompt_data_t *thread_data
);
typedef void (*ompt_callback_idle_t) (
  ompt_scope_endpoint_t endpoint
);
typedef void (*ompt_callback_parallel_begin_t) (
  ompt_data_t *parent_task_data,
  const ompt_frame_t *parent_frame,
  ompt_data_t *parallel_data,
  unsigned int requested_team_size,
  ompt_invoker_t invoker,
  const void *codeptr_ra
);
typedef void (*ompt_callback_parallel_end_t) (
  ompt_data_t *parallel_data,
  ompt_data_t *task_data,
  ompt_invoker_t invoker,
  const void *codeptr_ra
);
typedef void (*ompt_callback_master_t) (
  ompt_scope_endpoint_t endpoint,
  ompt_data_t *parallel_data,
  ompt_data_t *task_data,
  const void *codeptr_ra
);
typedef void (*ompt_callback_task_create_t) (
  ompt_data_t *parent_task_data,
  const ompt_frame_t *parent_frame,
  ompt_data_t *new_task_data,
  ompt_task_type_t type,
  int has_dependences,
  const void *codeptr_ra
);
typedef void (*ompt_callback_task_dependences_t) (
  ompt_data_t *task_data,
  const ompt_task_dependence_t *deps,
  int ndeps
);
typedef void (*ompt_callback_task_dependence_t) (
  ompt_data_t *src_task_data,
  ompt_data_t *sink_task_data
);
typedef void (*ompt_callback_task_schedule_t) (
  ompt_data_t *prior_task_data,
  ompt_task_status_t prior_task_status,
  ompt_data_t *next_task_data
);
typedef void (*ompt_callback_implicit_task_t) (
  ompt_scope_endpoint_t endpoint,
  ompt_data_t *parallel_data,
  ompt_data_t *task_data,
  unsigned int team_size,
  unsigned int thread_num
);
typedef void (*ompt_callback_sync_region_t) (
  ompt_sync_region_kind_t kind,
  ompt_scope_endpoint_t endpoint,
  ompt_data_t *parallel_data,
  ompt_data_t *task_data,
  const void *codeptr_ra
);
typedef void (*ompt_callback_lock_init_t) (
  ompt_mutex_kind_t kind,
  unsigned int hint,
  unsigned int impl,
  ompt_wait_id_t wait_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_lock_destroy_t) (
  ompt_mutex_kind_t kind,
  ompt_wait_id_t wait_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_mutex_acquire_t) (
  ompt_mutex_kind_t kind,
  unsigned int hint,
  unsigned int impl,
  ompt_wait_id_t wait_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_mutex_t) (
  ompt_mutex_kind_t kind,
  ompt_wait_id_t wait_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_nest_lock_t) (
  ompt_scope_endpoint_t endpoint,
  ompt_wait_id_t wait_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_work_t) (
  ompt_work_type_t wstype,
  ompt_scope_endpoint_t endpoint,
  ompt_data_t *parallel_data,
  ompt_data_t *task_data,
  uint64_t count,
  const void *codeptr_ra
);
typedef void (*ompt_callback_flush_t) (
  ompt_data_t *thread_data,
  const void *codeptr_ra
);
typedef void (*ompt_callback_target_t) (
  ompt_target_type_t kind,
  ompt_scope_endpoint_t endpoint,
  int device_id,
  ompt_data_t *task_data,
  ompt_id_t target_id,
  const void *codeptr_ra
);
typedef void (*ompt_callback_target_data_op_t) (
  ompt_id_t target_id,
  ompt_id_t host_op_id,
  ompt_target_data_op_t optype,
  void *host_addr,
  void *device_addr,
  size_t bytes
);
typedef void (*ompt_callback_target_map_t) (
  ompt_id_t target_id,
  unsigned int nitems,
  void **host_addr,
  void **device_addr,
  size_t *bytes,
  unsigned int *mapping_flags
);
typedef void (*ompt_callback_target_submit_t) (
  ompt_id_t target_id,
  ompt_id_t host_op_id
);
typedef int (*ompt_callback_control_tool_t) (
  uint64_t command,
  uint64_t modifier,
  void *arg
);
typedef void (*ompt_callback_cancel_t) (
  ompt_data_t *task_data,
  int flags,
  const void *codeptr_ra
  );
typedef void (*ompt_callback_device_initialize_t) (
  int device_id,
  const char *type,
  ompt_device_t *device,
  ompt_function_lookup_t *lookup,
  const char *documentation
);

/* OMPT entry point for tools */
typedef int (*ompt_enumerate_states_t)(
  int current_state, 
  int *next_state, 
  const char **next_state_name
);
typedef int (*ompt_enumerate_mutex_impls_t)(
  int current_impl, 
  int *next_impl, 
  const char **next_impl_name
);
#define ompt_mutex_impl_unknown 0
typedef enum ompt_set_result_e {
  ompt_set_error            = 0,
  ompt_set_none             = 1,
  ompt_set_sometimes        = 2,
  ompt_set_sometimes_paired = 3,
  ompt_set_always           = 4
} ompt_set_result_t;
typedef int (*ompt_set_callback_t)(
  ompt_callbacks_t which,
  ompt_callback_t callback
);
typedef int (*ompt_get_callback_t)(
  ompt_callbacks_t which,
  ompt_callback_t *callback
);
typedef ompt_data_t *(*ompt_get_thread_data_t)(void);
typedef int (*ompt_get_num_places_t)(void);
typedef int (*ompt_get_place_proc_ids_t)(
    int place_num,
    int ids_size,
    int *ids
  );
typedef int (*ompt_get_place_num_t)(void);
typedef int (*ompt_get_partition_place_nums_t)(
    int place_nums_size,
    int *place_nums
  );
typedef int (*ompt_get_proc_id_t)(void);
typedef omp_state_t (*ompt_get_state_t)(
  ompt_wait_id_t *wait_id
);
typedef int (*ompt_get_parallel_info_t)(
  int ancestor_level,
  ompt_data_t **parallel_data,
  int *team_size
);
typedef int (*ompt_get_task_info_t)(
  int ancestor_level,
  ompt_task_type_t *type,
  ompt_data_t **task_data,
  ompt_frame_t **task_frame,
  ompt_data_t **parallel_data,
  int *thread_num
);
typedef uint64_t (*ompt_get_unique_id_t)();
typedef int (*ompt_get_target_info_t)(
  int *device_id,
  ompt_id_t *target_id,
  ompt_id_t *host_op_id
);
typedef int (*ompt_get_num_devices_t)(void);
typedef ompt_device_time_t (*ompt_get_device_time_t)(
  ompt_device_t *device
);
typedef double (*ompt_translate_time_t)(
  ompt_device_t *device,
  ompt_device_time_t time
);
typedef int (*ompt_set_trace_ompt_t)(
  ompt_device_t *device,
  unsigned int enable,
  unsigned int etype
);
typedef int (*ompt_set_trace_native_t)(
  ompt_device_t *device,
  int enable,
  int flags
);
/*
typedef int (*ompt_start_trace_t)(
  ompt_device_t *device,
  ompt_callback_buffer_request_t request,
  ompt_callback_buffer_complete_t complete,
  ompt_callback_get_target_info_t get_info
);
typedef int (*ompt_pause_trace_t)(
  ompt_device_t *device,
  int begin_pause
);
typedef int (*ompt_stop_trace_t)(
  ompt_device_t *device
);
typedef int (*ompt_advance_buffer_cursor_t)(
  ompt_buffer_t *buffer,
  size_t size, 
  ompt_buffer_cursor_t current, 
  ompt_buffer_cursor_t *next
); 
typedef ompt_record_type_t (*ompt_buffer_get_record_type_t)(
  ompt_buffer_t *buffer, 
  ompt_buffer_cursor_t current
);
typedef ompt_record_ompt_t *(*ompt_buffer_get_record_ompt_t)(
  ompt_buffer_t *buffer, 
  ompt_buffer_cursor_t current
);
typedef void *(ompt_buffer_get_record_native_t)(
  ompt_buffer_t *buffer, 
  ompt_buffer_cursor_t current,
  ompt_id_t *host_op_id
);
typedef ompt_record_native_abstract_t *
(*ompt_buffer_get_record_native_abstract_t)(
  void *native_record
);
*/



/*****************************************************************************
 * OMPT API for usr
 *****************************************************************************/
typedef enum omp_control_tool_result_t {
  omp_control_tool_notool = -2,
  omp_control_tool_nocallback = -1,
  omp_control_tool_success = 0,
  omp_control_tool_ignored = 1
} omp_control_tool_result_t;

#ifdef _OPENMP
_OMP_EXTERN int omp_control_tool(int command, int modifier, void *arg);


#else
/*
 * Tools without OpenMP
 */
_OMP_INLINE int omp_control_tool(int command, int modifier, void *arg)
{ return (int) omp_control_tool_notool; }

#endif

/* OMPT API End */

/*****************************************************************************
 * Defines
 *****************************************************************************/

#undef _OMP_EXTERN 
#undef _OMP_INLINE 

#endif
