#include <cm/cmmd.h>
#include <cm/timers.h>

#include "fish.h"

#define RANDOM(r) 	((random()%10000)*r/10000.0) 
#define SQR(X)		((X)*(X))
#define ABS(X)		((X>=0) ? X : -(X))


#define RANGE 	40	/* range of ocean */

typedef struct {
	float px,py; 	/* position of fish */
	float vx,vy;	/* velocity of fish */
	float mass;
} AFish;	

main ()
{
	struct arg_data args;
	int fish;
	int tsteps;
	int psteps;
	int myfish;
	AFish *Fish;
	AFish *AllFish;
	int i,j;
	float t;
	float vsum, fx, fy;
	float dx,dy;
	float v,a,v2,d,d2;
	float maxv, maxa;
	float dt = 1;
	int k;

	CMMD_enable_host ();
	CMMD_receive_bc_from_host (&args, sizeof (struct arg_data));
    fish = args.fish;
    tsteps = args.tsteps;
    psteps = args.psteps;
    
    CMNA_timer_clear (0);
    CMNA_timer_start (0);

	/* initialize random number generator */
	srandom(CMMD_self_address());

	/* determine # fish each processor gets */
	myfish = fish / CMMD_partition_size();

	/* allocate space for fish */
	Fish = (AFish*) malloc(sizeof(AFish)*myfish);
	AllFish = (AFish*) malloc(sizeof(AFish)*fish);
	
	/* initialize fish position, velocity, and mass */
	for (i=0;i<myfish;++i) {
		Fish[i].px = RANDOM(100) - 50;
		Fish[i].py = RANDOM(100) - 50;
		Fish[i].vx = RANDOM(1.0);
		Fish[i].vy = RANDOM(1.0);
		Fish[i].mass = 1;
	}

	/* loop over all time steps */
	for (k=0,t=0;t<tsteps;t+=dt) {

		/* collect global info. */
		CMMD_concat_with_nodes(Fish,AllFish,sizeof(AFish)*myfish);

		/* move all fish and accumulate local info.*/
		maxv = 0;
		maxa = 0;
		vsum = 0;
		for (i=0;i<myfish;++i) {

			/* compute global force */
			fx = 0;
			fy = 0;
			for (j=0;j<fish;++j) {
				dx = AllFish[j].px - Fish[i].px;
				dy = AllFish[j].py - Fish[i].py;
				d2 = SQR(dx) + SQR(dy);
				if (dx!=0)
					fx = fx + Fish[i].mass*AllFish[j].mass *
						dx / d2;
				if (dy!=0)
					fy = fy + Fish[i].mass*AllFish[j].mass *
						dy / d2;
			}

			Fish[i].px += Fish[i].vx * dt;
			Fish[i].py += Fish[i].vy * dt;
			Fish[i].vx += fx / Fish[i].mass * dt;
			Fish[i].vy += fy / Fish[i].mass * dt;
			v2 = SQR(Fish[i].vx)+SQR(Fish[i].vy);
			v = sqrt(v2);
			a = sqrt(SQR(fx/Fish[i].mass)+SQR(fy/Fish[i].mass));
			maxv = MAX(maxv,v);
			maxa = MAX(maxa,a);
			vsum += v2;
		}
		/* send info. to host for display */
		if (++k > psteps) {
			k = 0;
			CMMD_send(CMMD_host_node(),0,Fish,sizeof(AFish)*myfish);
		}
		
		/* compute global rms velocity */
		vsum = CMMD_reduce_to_host_float(vsum, CMMD_combiner_fadd);

		/* compute next time step */
		maxv = CMMD_reduce_to_host_float(maxv, CMMD_combiner_fmax);
		maxa = CMMD_reduce_to_host_float(maxa, CMMD_combiner_fmax);
		dt = MIN(0.1*maxv/maxa,1);
	}

	/* synchronize with host to end program */
    CMNA_timer_stop (0);
    CMMD_reduce_to_host_double (CMNA_timer_busy (0), CMMD_combiner_dmax);

	return;
}