///-*-C++-*-//////////////////////////////////////////////////////////////////
//
// Hoard: A Fast, Scalable, and Memory-Efficient Allocator
//        for Shared-Memory Multiprocessors
// Contact author: Emery Berger, http://www.cs.utexas.edu/users/emery
//
// Copyright (c) 1998-2000, The University of Texas at Austin.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Library General Public License as
// published by the Free Software Foundation, http://www.fsf.org.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Library General Public License for more details.
//
//////////////////////////////////////////////////////////////////////////////


//////////////////////////////////////////////////////////////////////////////
//
// Note: This file was modified by Crystal Decisions in June 2002.
//
//////////////////////////////////////////////////////////////////////////////


#include <assert.h>

#include "arch-specific.h"

#if defined(CRYSTAL_HOARD) && defined(__SVR4)
  // The Solaris version of the Crystal Hoard will call malloc instead of mmap.
  // This is a bit of an experiment.  mmap gets very slow when managing many
  // small allocations, which we need to do if we want to release blocks.
  #include <dlfcn.h>
#endif

// How many iterations we spin waiting for a lock.
enum { SPIN_LIMIT = 100 };

// Crystal will define this in arch-specific.h
#if !defined(CRYSTAL_HOARD)
// The values of a user-level lock.
enum { UNLOCKED = 0, LOCKED = 1 };
#endif

extern "C" {

#if defined(__BEOS__)

#include <OS.h>
#include <unistd.h>

void hoardCreateThread (hoardThreadType &t,
			void *( *function)( void *),
			void *arg)
{
  t = spawn_thread((int32 (*)(void*))function, "some thread",
		   B_NORMAL_PRIORITY, arg);
  if (t >= B_OK) resume_thread(t);
  else debugger("spawn_thread() failed!");
}


void hoardJoinThread (hoardThreadType &t)
{
  status_t dummy;
  wait_for_thread(t, &dummy);
}


void hoardSetConcurrency (int)
{
}


int hoardGetThreadID (void)
{
  return find_thread(0);
}


void hoardLockInit (hoardLockType &lock)
{
  lock.ben = 0;
  lock.sem = create_sem(0, "a hoard lock");
}


void hoardLock (hoardLockType &lock)
{
  if((atomic_add(&(lock.ben), 1)) >= 1) acquire_sem(lock.sem);
}


void hoardUnlock (hoardLockType &lock)
{
  if((atomic_add(&(lock.ben), -1)) > 1) release_sem(lock.sem);
}

int hoardGetPageSize (void)
{
  return B_PAGE_SIZE;
}

int hoardGetNumProcessors (void)
{
  system_info si;
  status_t result = get_system_info(&si);
  assert (result == B_OK);
  return si.cpu_count;
}


void * hoardSbrk (long size)
{
  return sbrk(size + hoardHeap::ALIGNMENT - 1);
}


void hoardUnsbrk (void * ptr, long size)
{
  // NOT CURRENTLY IMPLEMENTED!
}

void hoardYield (void)
{
}

unsigned long hoardInterlockedExchange (unsigned long * oldval,
					unsigned long newval)
{
  // This *should* be made atomic.  It's never used in the BeOS
  // version, so this is included strictly for completeness.
  unsigned long o = *oldval;
  *oldval = newval;
  return o;
}


#elif !defined(WIN32)   // UNIX


#if USE_SPROC
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <ulocks.h>
#endif

void hoardCreateThread (hoardThreadType& t,
			void *(*function) (void *),
			void * arg)
{
#if USE_SPROC
  typedef void (*sprocFunction) (void *);
  t = sproc ((sprocFunction) function, PR_SADDR, arg);
#else
  pthread_attr_t attr;
  pthread_attr_init (&attr);
#if defined(_AIX)
  // Bound (kernel-level) threads.
  pthread_attr_setscope (&attr, PTHREAD_SCOPE_SYSTEM);
#endif
  pthread_create (&t, &attr, function, arg);
#endif
}

void hoardJoinThread (hoardThreadType& t)
{
#if USE_SPROC
  waitpid (t, 0, 0);
#else
  pthread_join (t, NULL);
#endif
}

#if defined(__linux)
// This extern declaration is required for Linux.
extern "C" void pthread_setconcurrency (int n);
#endif

void hoardSetConcurrency (int n)
{
#if USE_SPROC
  usconfig (CONF_INITUSERS, n);
#elif defined(__SVR4) // Solaris
  thr_setconcurrency (n);
#else
  pthread_setconcurrency (n);
#endif
}


#if defined(__SVR4) // Solaris

// Solaris's two-level threads model gives us an edge here;
// we can hash on the LWP's id. This helps us in two ways:
// (1) there are likely to be far fewer LWP's than threads,
// (2) if there's a one-to-one correspondence between LWP's
//     and the number of processors (usually the case), then
//     the number of heaps used will be the same as the number
//     of processors (the optimal case).
// Here we rely on an undocumented call in libthread.so, which
// turns out to be MUCH cheaper than the documented _lwp_self(). Go figure.

  #if defined(CRYSTAL_HOARD)
    // Crystal will use the undocumented _lwp_self(), only if it exists
    // in the process' symbol space.  Otherwise, it will use the documented
    // version.  The allows us to LD_PRELOAD the Hoard into single-threaded apps.
    // Not much to be gained by this, except that now you don't have to worry
    // about your LD_PRELOAD so much.
    #include <sys/lwp.h>
    typedef unsigned int (*lwp_self_func)(void);
  #endif
  extern "C" unsigned int lwp_self(void);
#endif

int hoardGetThreadID (void) {
#if USE_SPROC
  // This hairiness has the same effect as calling getpid(),
  // but it's MUCH faster since it avoids making a system call
  // and just accesses the sproc-local data directly.
  int pid = (int) PRDA->sys_prda.prda_sys.t_pid;
  return pid;
#else
#if defined(__linux)
  // Consecutive thread id's in Linux are 1024 apart;
  // dividing off the 1024 gives us an appropriate thread id.
  return (int) pthread_self() >> 10; // (>> 10 = / 1024)
#endif
#if defined(__SVR4)

  #if defined(CRYSTAL_HOARD)
  // Crystal will use the undocumented lwp_self() function only if
  // it exists in the process' symbol space.
  static lwp_self_func undocumented_lwp_self = (lwp_self_func)dlsym(RTLD_DEFAULT, "lwp_self");
  if ( undocumented_lwp_self )
    return (int)undocumented_lwp_self();
  else
    return (int) _lwp_self();
  #else
  return (int) lwp_self();
  #endif
#endif
  return (int) pthread_self();
#endif
}


// If we are using either Intel or SPARC,
// we use our own lock implementation
// (spin then yield). This is much cheaper than
// the ordinary mutex, at least on Linux and Solaris.

#if USER_LOCKS && (defined(i386) || defined(sparc) || defined(__sgi) || defined(ppc))

#include <sched.h>

#if defined(__sgi)
#include <mutex.h>
#endif


// Atomically:
//   retval = *oldval;
//   *oldval = newval;
//   return retval;

#if defined(sparc) && !defined(__GNUC__)
extern "C" unsigned long InterlockedExchange (unsigned long * oldval,
					      unsigned long newval);
#else
static inline unsigned long InterlockedExchange (unsigned long * oldval,
						 unsigned long newval)
{
#if defined(sparc)
  asm volatile ("swap [%1],%0"
		:"=r" (newval)
		:"r" (oldval), "0" (newval)
		: "memory");

#endif
#if defined(i386)
  asm volatile ("xchgl %0, %1"
		: "=r" (newval)
		: "m" (*oldval), "0" (newval)
		: "memory");
#endif
#if defined(__sgi)
  newval = test_and_set (oldval, newval);
#endif
#if defined(ppc)
  int ret;
  asm volatile ("sync;"
		"0:    lwarx %0,0,%1 ;"
		"      xor. %0,%3,%0;"
		"      bne 1f;"
		"      stwcx. %2,0,%1;"
		"      bne- 0b;"
		"1:    sync"
	: "=&r"(ret)
	: "r"(oldval), "r"(newval), "r"(*oldval)
	: "cr0", "memory");
#endif
  return newval;
}
#endif

unsigned long hoardInterlockedExchange (unsigned long * oldval,
					unsigned long newval)
{
  return InterlockedExchange (oldval, newval);
}

void hoardLockInit (hoardLockType& mutex) {
  InterlockedExchange (&mutex, UNLOCKED);
}

#include <stdio.h>

void hoardLock (hoardLockType& mutex) {
  // A yielding lock (with an initial spin).
  int i;
  while (1) {
    i = 0;
    while (i < SPIN_LIMIT) {
      if (InterlockedExchange (&mutex, LOCKED) == UNLOCKED) {
        // We got the lock.
        return;
      }
      i++;
    }
    // The lock is still being held by someone else.
    // Give up our quantum.
    hoardYield();
  }
}

void hoardUnlock (hoardLockType& mutex) {
  InterlockedExchange (&mutex, UNLOCKED);
}

#else

// use non-user-level locks. 

#endif // USER_LOCKS

#if defined(__SVR4)
#include <thread.h>
#endif

void hoardYield (void)
{
#if defined(__SVR4)
  thr_yield();
#else
  sched_yield();
#endif
}

#if 1 // !(defined(__sgi))
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/mman.h>
#endif


#if defined(CRYSTAL_HOARD) && defined(__SVR4)
void * hoardSbrk (long size)
{
  // Try to grab the next nearest "malloc".  This trick will allow other memory allocators to be
  // super-imposed with us.  If we're not sitting on any other memory allocators, this will pick
  // up the libc.so.1 malloc.  If THAT fails, we'll use mmap.
  typedef void * (*mallocFunc)(size_t size);
  static mallocFunc libc_malloc = (mallocFunc)dlsym(RTLD_NEXT, "malloc");

  if ( libc_malloc )
    return libc_malloc(size);
  else
  {
    // Use mmap to get memory from the backing store.
    static int fd = ::open ("/dev/zero", O_RDWR);
    char * ptr = (char *) mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
    if (ptr == (char *) MAP_FAILED)
      return NULL;
    else
      return ptr;
  }
}

void hoardUnsbrk (void * ptr, long size)
{
  // Try to grab the next nearest "free".  This trick will allow other memory allocators to be
  // super-imposed with us.  If we're not sitting on any other memory allocators, this will pick
  // up the libc.so.1 free.  If THAT fails, we'll use munmap.
  typedef void (*freeFunc)(void * ptr);
  static freeFunc libc_free = (freeFunc)dlsym(RTLD_NEXT, "free");

  if ( libc_free )
    libc_free(ptr);
  else
  {
    int result = munmap ((char *) ptr, size);
    assert(result == 0);
  }
}

#else  // CRYSTAL_HOARD && __SVR4

void * hoardSbrk (long size) {
#if defined(linux)
#if USER_LOCKS
  static hoardLockType sbrkLock = 0;
#else
  static hoardLockType sbrkLock = PTHREAD_MUTEX_INITIALIZER;
#endif
#endif
  // Use mmap to get memory from the backing store.
  static int fd = ::open ("/dev/zero", O_RDWR);
#if defined(linux)
  hoardLock (sbrkLock);
#endif
  char * ptr = (char *) mmap (NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
  char * retPtr;
  if (ptr == (char *) MAP_FAILED) {
    retPtr = NULL;
  } else {
    retPtr = ptr;
  }
#if defined(linux)
  hoardUnlock (sbrkLock);
#endif
  return retPtr;
}

void hoardUnsbrk (void * ptr, long size)
{
#if defined(linux)
#if USER_LOCKS
  static hoardLockType sbrkLock = 0;
#else
  static hoardLockType sbrkLock = PTHREAD_MUTEX_INITIALIZER;
#endif
#endif
#if defined(linux)
  hoardLock (sbrkLock);
#endif
  int result = munmap ((char *) ptr, size);
  assert (result == 0);
#if defined(linux)
  hoardUnlock (sbrkLock);
#endif
}

#endif // CRYSTAL_HOARD && __SVR4

int hoardGetPageSize (void)
{
  return (int) sysconf(_SC_PAGESIZE);
}


#if defined(linux)
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#endif

#if defined(__sgi)
#include <sys/types.h>
#include <sys/sysmp.h>
#include <sys/sysinfo.h>
#endif

int hoardGetNumProcessors (void)
{
#if !(defined(linux))
#if defined(__sgi)
  return (int) sysmp(MP_NAPROCS);
#else
  return (int) sysconf(_SC_NPROCESSORS_ONLN);
#endif
#else
  // Ugly workaround.  Linux's sysconf indirectly calls malloc() (at
  // least on multiprocessors).  So we just read the info from the
  // proc file ourselves and count the occurrences of the word
  // "processor".

  // We only parse the first 32K of the CPU file.  By my estimates,
  // that should be more than enough for at least 64 processors.
  enum { MAX_PROCFILE_SIZE = 32768 };
  char line[MAX_PROCFILE_SIZE];
  int numProcessors = 0;
  int fd = open ("/proc/cpuinfo", O_RDONLY);
  assert (fd);
  read(fd, line, MAX_PROCFILE_SIZE);
  char * str = line;
  while (str) {
    str = strstr(str, "processor");
    if (str) {
      numProcessors++;
      str++;
    }
  }
  close (fd);
  assert (numProcessors > 0);
  return numProcessors;
#endif
}

#endif // UNIX

}
