#include <Core/Core.h>
#include <nImage/Image.h>

/*
	 - new distance square "(d+x)^2" calculation (from old distance square "d^2" and old delta "d")
	 	delta will change by +-4 (blue), +-2 (red) or +-1 (green)
	 	+-1 : (d+1)^2 = d^2 + (2d + 1)		max 62->63: 125		(max distance delta for green)
	 	+-2 : (d+2)^2 = d^2 + (4d + 4)		nax 60->62: 244
	 	+-4 : (d+4)^2 = d^2 + (8d + 16)		max 56->60: 464
*/

#ifdef COMPILER_MSC
#pragma pack(push, 1)
#endif
struct sCubePoint3 : Moveable<sCubePoint3> {
	word			address;		//high-color 5:6:4 = address into cube space too
	byte			index;			//index into palette
	int8			delta[3];		//current deltas of this point from origin
}
#ifdef COMPILER_GCC
__attribute__((packed))
#endif
;
#ifdef COMPILER_MSC
#pragma pack(pop)
#endif

//conversion map
struct PaletteCv3 {
	enum {
		MAX_DISTANCE_DELTA 	= (8*56+16),

		R_SHIFT				= 10,
		G_SHIFT				= 0,
		B_SHIFT				= 6,
		R_MASK				= ((RASTER_MAP_R-1)<<R_SHIFT),
		G_MASK				= ((RASTER_MAP_G-1)<<G_SHIFT),
		B_MASK				= ((RASTER_MAP_B-1)<<B_SHIFT),
		R_ADR_ADD			= (1<<R_SHIFT),
		G_ADR_ADD			= (1<<G_SHIFT),
		B_ADR_ADD			= (1<<B_SHIFT),
	};

	Buffer<byte> cv;
	static inline word GetIndex(const RGBA &c) {
		return	(word(c.r >> RASTER_SHIFT_R) << R_SHIFT) +
	        	(word(c.g >> RASTER_SHIFT_G) << G_SHIFT) +
	        	(word(c.b >> RASTER_SHIFT_B) << B_SHIFT); }
	byte	Get(const RGBA& c) const { return cv[GetIndex(c)]; }
	PaletteCv3()	{ cv.Alloc(RASTER_MAP_R * RASTER_MAP_G * RASTER_MAP_B); }
};

//generator of data for conversion maps
struct sPalCv3 {
	PaletteCv3& 		cv_pal;
	const RGBA 			*palette;
	int         		ncolors;
	//FIFO queue for parallel flood fill, radix-sorted by distance of points from their origin
	//the radix sort works on dynamic subset of distances, as you never need the full range during fill
	Vector<sCubePoint3> feed_me[PaletteCv3::MAX_DISTANCE_DELTA+1];
	byte				filled[RASTER_MAP_R * RASTER_MAP_G * RASTER_MAP_B];

	void AddPoint(const sCubePoint3 & cubpt, int ii, word add2address, int move, int a, int8 add2delta);

	sPalCv3(const RGBA *palette, int ncolors, PaletteCv3& cv_pal);
};

struct sFillMovementData {
	word			mask;
	word			mask_delta;
	int8			delta;
	int				a, b;
};
static const sFillMovementData fmovedata[3] =
{
	{PaletteCv3::R_MASK, PaletteCv3::R_ADR_ADD, 2, 4, 4},
	{PaletteCv3::G_MASK, PaletteCv3::G_ADR_ADD, 1, 2, 1},
	{PaletteCv3::B_MASK, PaletteCv3::B_ADR_ADD, 4, 8, 16},
};

void sPalCv3::AddPoint(const sCubePoint3 & cubpt, int ii, word add2address, int move, int a, int8 add2delta)
{
	int newii;
	sCubePoint3	cubpt2;

	cubpt2.address = cubpt.address + add2address;
	if ( filled[cubpt2.address] ) return;
	
	newii = ii + a * cubpt.delta[move] + fmovedata[move].b;
	ASSERT( newii > ii );
	if ( newii > PaletteCv3::MAX_DISTANCE_DELTA ) {
		newii -= PaletteCv3::MAX_DISTANCE_DELTA+1;
		ASSERT( newii <= PaletteCv3::MAX_DISTANCE_DELTA );
		ASSERT( newii < ii );
	}
	cubpt2.delta[0] = cubpt.delta[0];
	cubpt2.delta[1] = cubpt.delta[1];
	cubpt2.delta[2] = cubpt.delta[2];
	cubpt2.index	= cubpt.index;
	cubpt2.delta[move] += add2delta;
	feed_me[newii].Add(cubpt2);
}


sPalCv3::sPalCv3(const RGBA *palette, int ncolors, PaletteCv3& cv_pal)
:	cv_pal(cv_pal), ncolors(ncolors), palette(palette)
{
	int			ii, jj = (RASTER_MAP_R * RASTER_MAP_G * RASTER_MAP_B), move;
	sCubePoint3	cubpt;
	
	ZeroArray(filled);
	feed_me[0].Reserve(ncolors);
	//Fill up the FIFO queue with colors from palette,
	//those will start the parallel flood fill in the color cube space
	ii = ncolors;
	cubpt.delta[0] = cubpt.delta[1] = cubpt.delta[2] = 0;
	while (ii--) {
		cubpt.index		= ii;
		cubpt.address	= cv_pal.GetIndex(palette[ii]);
		feed_me[0].Add(cubpt);
	}
	//process the FIFO queue untill all points in color cube space are filled (jj == 0)
	ii = 0;
	while ( true ) {
		//if ( !feed_me[ii].IsEmpty() ) printf("%d\t(%d)\t", ii, feed_me[ii].GetCount());
		while ( !feed_me[ii].IsEmpty() ) {
			cubpt = feed_me[ii].Pop();
			if ( filled[cubpt.address] ) continue;
			cv_pal.cv[cubpt.address] = cubpt.index;
			if ( --jj == 0 ) return;
			filled[cubpt.address] = 1;
			//try all possible moves (6 possible directions)
			for ( move = 0; move < 3; ++move )
			{
				if ( ( cubpt.delta[move] >= 0 ) &&
					 ( (cubpt.address & fmovedata[move].mask) < fmovedata[move].mask ) )
					 AddPoint(cubpt, ii, fmovedata[move].mask_delta, move, fmovedata[move].a, fmovedata[move].delta);
				if ( ( cubpt.delta[move] <= 0 ) &&
					 ( (cubpt.address & fmovedata[move].mask) > 0 ) )
					 AddPoint(cubpt, ii, -fmovedata[move].mask_delta, move, -fmovedata[move].a, -fmovedata[move].delta);
			}
		}
		if ( ++ii > PaletteCv3::MAX_DISTANCE_DELTA ) ii = 0;
	}
	return;
}

/*	OLD version  */

enum {
	RASTER_MAP_MAX_SHIFT = 2,
	
	RASTER_MAP_R_ADD = (1<<RASTER_SHIFT_R),
	RASTER_MAP_G_ADD = (1<<RASTER_SHIFT_G),
	RASTER_MAP_B_ADD = (1<<RASTER_SHIFT_B),
	RASTER_MAP_MAX_DIST = 3*((RASTER_MAP_MAX-1)*(RASTER_MAP_MAX-1))
};

struct PaletteCv2 {
	Buffer<byte> cv;
	static inline word GetIndex(const RGBA &c) { return (int(c.r >> RASTER_SHIFT_R) << 10) +
	                                          (int(c.g >> RASTER_SHIFT_G)) +
	                                          (int(c.b >> RASTER_SHIFT_B) << 6); }
	byte  Get(const RGBA& c) const   { return cv[GetIndex(c)]; }
	PaletteCv2()                      { cv.Alloc(RASTER_MAP_R * RASTER_MAP_G * RASTER_MAP_B); }
};

struct sCubePoint : Moveable<sCubePoint> {
	RGBA	mycol;
	byte	index;
};

struct sPalCv2 {
	PaletteCv2&  cv_pal;
	const RGBA *palette;
	int         ncolors;

	void AddPoint(byte r, byte g, byte b, byte idx, Vector<sCubePoint> *feed);

	sPalCv2(const RGBA *palette, int ncolors, PaletteCv2& cv_pal);
};

void sPalCv2::AddPoint(byte r, byte g, byte b, byte idx, Vector<sCubePoint> *feed)
{
	sCubePoint pt;
	int dr = (int(palette[idx].r)-int(r))>>RASTER_MAP_MAX_SHIFT;
	int dg = (int(palette[idx].g)-int(g))>>RASTER_MAP_MAX_SHIFT;
	int db = (int(palette[idx].b)-int(b))>>RASTER_MAP_MAX_SHIFT;
	int dist = dr*dr + dg*dg + db*db;
	pt.mycol.r = r;
	pt.mycol.g = g;
	pt.mycol.b = b;
	pt.index = idx;
	ASSERT(dist <= RASTER_MAP_MAX_DIST);
	feed[dist].Add(pt);
}

sPalCv2::sPalCv2(const RGBA *palette, int ncolors, PaletteCv2& cv_pal)
:	cv_pal(cv_pal), ncolors(ncolors), palette(palette)
{
	int ii, jj;
	sCubePoint cubpt;
	Vector<sCubePoint> feed_me[RASTER_MAP_MAX_DIST+1];
	byte filled[RASTER_MAP_R * RASTER_MAP_G * RASTER_MAP_B];
	ZeroArray(filled);
	
	ii = ncolors;	
	while (ii--) {
		cubpt.index = ii;
		cubpt.mycol = palette[ii];
		feed_me[0].Add(cubpt);
	}
	for (ii = 0; ii <= RASTER_MAP_MAX_DIST; ++ii) {
		while ( !feed_me[ii].IsEmpty() ) {
			cubpt = feed_me[ii].Pop();
			jj = cv_pal.GetIndex(cubpt.mycol);
			if (filled[jj] != 0) continue;
			filled[jj] = 1;
			cv_pal.cv[jj] = cubpt.index;
			if ( int(cubpt.mycol.r)+RASTER_MAP_R_ADD <= 255 )
				AddPoint(cubpt.mycol.r+RASTER_MAP_R_ADD, cubpt.mycol.g, cubpt.mycol.b, cubpt.index, feed_me);
			if ( int(cubpt.mycol.r)-RASTER_MAP_R_ADD >= 0 )
				AddPoint(cubpt.mycol.r-RASTER_MAP_R_ADD, cubpt.mycol.g, cubpt.mycol.b, cubpt.index, feed_me);
			if ( int(cubpt.mycol.g)+RASTER_MAP_G_ADD <= 255 )
				AddPoint(cubpt.mycol.r, cubpt.mycol.g+RASTER_MAP_G_ADD, cubpt.mycol.b, cubpt.index, feed_me);
			if ( int(cubpt.mycol.g)-RASTER_MAP_G_ADD >= 0 )
				AddPoint(cubpt.mycol.r, cubpt.mycol.g-RASTER_MAP_G_ADD, cubpt.mycol.b, cubpt.index, feed_me);
			if ( int(cubpt.mycol.b)+RASTER_MAP_B_ADD <= 255 )
				AddPoint(cubpt.mycol.r, cubpt.mycol.g, cubpt.mycol.b+RASTER_MAP_B_ADD, cubpt.index, feed_me);
			if ( int(cubpt.mycol.b)-RASTER_MAP_B_ADD >= 0 )
				AddPoint(cubpt.mycol.r, cubpt.mycol.g, cubpt.mycol.b-RASTER_MAP_B_ADD, cubpt.index, feed_me);
		}
	}
}

/*	benchmark&test code  */

RGBA	test_pal[256];
RGBA	test_pal2[256];
PaletteCv	test_cv;
PaletteCv2	test_cv2;
PaletteCv3	test_cv3;

void CreatePaletteCv2(const RGBA *palette, int ncolors, PaletteCv2& cv_pal)
{
	delete new sPalCv2(palette, ncolors, cv_pal);
}

void CreatePaletteCv3(const RGBA *palette, int ncolors, PaletteCv3& cv_pal)
{
	delete new sPalCv3(palette, ncolors, cv_pal);
}

CONSOLE_APP_MAIN
{
	int ii;
	
	for (ii=0; ii < 256; ++ii) {
		test_pal[ii].r = byte(ii*7);
		test_pal[ii].g = byte(ii*15);
		test_pal[ii].b = byte(ii*23);
	}
	for (ii=0; ii < 256; ++ii) {
		test_pal2[ii].r = byte(ii*3);
		test_pal2[ii].g = byte(ii*15);
		test_pal2[ii].b = byte(ii*47);
	}
	
	for (ii = 0; ii < 500; ++ii)
		CreatePaletteCv3(test_pal, 256, test_cv3);
		//CreatePaletteCv2(test_pal, 256, test_cv2);
		//CreatePaletteCv(test_pal, 256, test_cv);
/*/
	int j, k, rr, gg, bb, dj, dk;

	test_pal[0].r = 255;
	test_pal[0].g = 255;
	test_pal[0].b = 255;
	test_pal[1].r = 0;
	test_pal[1].g = 0;
	test_pal[1].b = 0;

	CreatePaletteCv3(test_pal, 2, test_cv3);
	CreatePaletteCv(test_pal, 2, test_cv);
	
	for (ii=0; ii < 256; ++ii) {
		j = test_cv.Get(test_pal2[ii]);
		k = test_cv3.Get(test_pal2[ii]);
		if ( j == k ) continue;
		rr = test_pal2[ii].r-test_pal[j].r;
		gg = test_pal2[ii].g-test_pal[j].g;
		bb = test_pal2[ii].b-test_pal[j].b;
		dj = rr*rr + gg*gg + bb*bb;
		rr = test_pal2[ii].r-test_pal[k].r;
		gg = test_pal2[ii].g-test_pal[k].g;
		bb = test_pal2[ii].b-test_pal[k].b;
		dk = rr*rr + gg*gg + bb*bb;
		printf("%d\t%d\t%d\t%d\n", j, k, dj, dk);
		printf("[%d,%d,%d]\t[%d,%d,%d]\t[%d,%d,%d]\n",
			test_pal2[ii].r, test_pal2[ii].g, test_pal2[ii].b,
			test_pal[j].r, test_pal[j].g, test_pal[j].b,
			test_pal[k].r, test_pal[k].g, test_pal[k].b
			);
	}
/**/	
}
