


#pragma once

#ifdef DLLProvider
#define DLL_API_C extern "C" __declspec(dllexport)
#define DLL_API __declspec(dllexport)
#define DLL_API_C extern "C" __declspec(dllimport)
#define DLL_API __declspec(dllimport)

//nBgValue = 0/255; block_size = 2048; sigma = 0.0; k = 0.0; min_size = 100;
int ImgSegBlock(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned char* pDataDst, int block_size, float sigma, float k, int min_size);
int ImgSegBlockMark(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned int* pDataDst, int block_size, float sigma, float k, int min_size);
int KMeansSegment(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned int* pDataDst, int block_size, float sigma, float k, int min_size);


#include <cstdio>
#include <cstdlib>
#include <image.h>
#include <misc.h>
#include <pnmfile.h>
#include "segment-image.h"
#include <map>
#include <omp.h>

//#define DLLProvider
#include "imgseg.h"
#include "..\FindBoundary.h"
#include "arealines.h"
#include "k-means.h"
//#include "Log.h"
#include "..\src\StdStrFile.h"

#define SIGMA_MIN	0.1
#define SIGMA_MAX	0.5
#define K_MIN		5
#define K_MAX		100

#ifndef MIN
#  define MIN(a,b)  ((a) > (b) ? (b) : (a))

#ifndef MAX
#  define MAX(a,b)  ((a) < (b) ? (b) : (a))

#define imData(pData, x, y, width) (pData[(y) * (width) + x])

void inline CheckParameters(int width, int height, float& sigma, float& k)
	if (sigma < 1e-5)
		//sigma = MAX(SIGMA_MIN, MIN(SIGMA_MAX, MAX(width, height) / 10000.0));
		sigma = 0.0;

	if (k < 1e-5)
		//k = MAX(K_MIN, MIN(K_MAX, MAX(width, height) / 50.0));
		k = 0.0;

void inline SaveBufferPPM(unsigned char* pDataSrc, int width, int height, const char* szFilePath = "D:\\a.ppm")
	image<rgb> *input = new image<rgb>(width, height);
	memcpy(input->data, pDataSrc, width * height * 3);
	savePPM(input, szFilePath);
	delete input;

void inline SaveBoundBufferPPM(unsigned char* pDataSrc, int width, int height, const char* szFilePath = "D:\\b.ppm")
	image<rgb> *input = new image<rgb>(width, height);
	for (size_t j = 0; j < height; ++j)
		unsigned char* pSubData = pDataSrc + j * width;
		rgb* data = input->data + j * width;
		for (size_t i = 0; i < width; ++i)
			if (pSubData[i])
				data[i].r  = data[i].g =data[i].b = 255;
				data[i].r  = data[i].g =data[i].b = 0;
	savePPM(input, szFilePath);
	delete input;

void inline SaveBufferPPM(unsigned int* pDataSrc, int width, int height, const char* szFilePath = "D:\\seg.ppm")
	// pick random colors for each component
	rgb *colors = new rgb[width*height]();
	for (int i = 0; i < width*height; i++)
		colors[i] = random_rgb();

	image<rgb> *input = new image<rgb>(width, height);
	for (size_t j = 0; j < height; ++j)
		unsigned int* pInt = pDataSrc + j * width;
		rgb* data = input->data + j * width;
		for (size_t i = 0; i < width; ++i)
			data[i] = colors[pInt[i]];
	savePPM(input, szFilePath);
	delete input;

int SegmentPPM(const char* szFilePath, float sigma, float k, int min_size, const char* szDstPath)
	if (szFilePath == nullptr || szDstPath == nullptr)
		return -1;

	printf("loading input image.\n");
	image<rgb> *input = loadPPM(szFilePath);

	int width = input->width();
	int height = input->height();
	CheckParameters(width, height, sigma, k);

	int num_ccs = 0; 
	uchar* pBound = new uchar[width * height]();
	image<rgb> *seg = segment_image(input, sigma, k, min_size, &num_ccs, pBound); 
	delete[] pBound;
	savePPM(seg, szDstPath);
	delete seg;
	printf("got %d components\n", num_ccs);
	printf("done! uff...thats hard work.\n");

	return 0;

int ImgSegmentBlock(unsigned char* pDataSrc, int width, int height, uchar* pBound, unsigned char* pDataDst, int block_size, float sigma, float k, int min_size)
	CheckParameters(width, height, sigma, k);

	int num_ccs = 0;
	if (block_size <= 0 || width <= block_size && height <= block_size)
		image<rgb> *input = new image<rgb>(width, height);
		memcpy(input->data, pDataSrc, width * height * 3);
		image<rgb> *seg = segment_image(input, sigma, k, min_size, &num_ccs, pBound); 
		memcpy(pDataDst, seg->data, width * height * 3);
		delete input;
		delete seg;
		printf("got %d components\n", num_ccs);
		printf("done! uff...thats hard work.\n");
		int nColIndex = MAX(1, (width /*- 1*/) / block_size /*+ 1*/);		//計算列方向上塊數
		int nRowIndex = MAX(1, (height /*- 1*/) / block_size /*+ 1*/);		//計算行方向塊數
		int nThreadNum = omp_get_num_procs();
		int nBISum = nColIndex * nRowIndex;
#pragma omp parallel for num_threads(nThreadNum) reduction(+:num_ccs)
		for (int nBI = 0; nBI < nBISum; nBI++)
			int nRI = nBI / nColIndex;
			int nCI = nBI % nColIndex;
			unsigned char* pRDataSrc = pDataSrc + nRI * block_size * width * 3;
			unsigned char* pRDataDst = pDataDst + nRI * block_size * width * 3;
			uchar* pRDataBound = pBound + nRI * block_size * width;

			int nCurTh = nBI % nThreadNum;
			int nBlockWidth = block_size;
			int nBlockHeight = block_size;
			if (nCI == nColIndex - 1 && width % block_size != 0)nBlockWidth = (width) % block_size + block_size;
			if (nRI == nRowIndex - 1 && height % block_size != 0)nBlockHeight = (height) % block_size + block_size;
			nBlockWidth = MIN(nBlockWidth, width);
			nBlockHeight = MIN(nBlockHeight, height);
			unsigned char*	pDataBlockSrc = new unsigned char[nBlockWidth * nBlockHeight * 3]();
			unsigned char*	pDataBlockDst = new unsigned char[nBlockWidth * nBlockHeight * 3]();
			unsigned char*	pDataBound = new uchar[nBlockWidth * nBlockHeight]();
			//memset(pDataBlockSrc, 0, nBlockWidth * nBlockHeight * 3);
			//memset(pDataBlockDst, 0, nBlockWidth * nBlockHeight * 3);
			//memset(pDataBound, 0, nBlockWidth * nBlockHeight);
			unsigned char* pCDataSrc = pRDataSrc + nCI * block_size * 3;
			unsigned char* pCDataDst = pRDataDst + nCI * block_size * 3;
			uchar* pCDataBound = pRDataBound + nCI * block_size;

			for (size_t j = 0; j < nBlockHeight; ++j)
				unsigned char* pSubDataSrc = pCDataSrc + j * width * 3;
				unsigned char* pSubBlockDataDst = pDataBlockSrc + j * nBlockWidth * 3;
				uchar* pSubDataBoundSrc = pCDataBound + j * width;
				uchar* pSubDataBoundDst = pDataBound + j * nBlockWidth;
				memcpy(pSubBlockDataDst, pSubDataSrc, nBlockWidth * 3);
				memcpy(pSubDataBoundDst, pSubDataBoundSrc, nBlockWidth);

			num_ccs += ImgSegmentBlock(pDataBlockSrc, nBlockWidth, nBlockHeight, pDataBound, pDataBlockDst, -1, sigma, k, min_size);
			for (size_t j = 0; j < nBlockHeight; ++j)
				unsigned char* pSubDataDst = pCDataDst + j * width * 3;
				unsigned char* pSubBlockDataDst = pDataBlockDst + j * nBlockWidth * 3;
				memcpy(pSubDataDst, pSubBlockDataDst, nBlockWidth * 3);

			delete[] pDataBlockSrc;
			delete[] pDataBlockDst;
			delete[] pDataBound;

	return num_ccs;

int ImgSegmentBlockMark(unsigned char* pDataSrc, int width, int height, uchar* pBound,
	unsigned int* pDataDst, int block_size, int nLastIndex, float sigma, float k, int min_size)
	CheckParameters(width, height, sigma, k);

	if (block_size <= 0 || width <= block_size && height <= block_size)
		image<rgb> *input = new image<rgb>(width, height);
		memcpy(input->data, pDataSrc, width * height * 3);

		universe* u = segment_image_u(input, sigma, k, min_size, pBound);

		int num_ccs = u->num_sets();
		memset(pDataDst, 0, width * height * sizeof(int));
		std::map<int, int> mvs;
		for (size_t j = 0; j < height; ++j)
			unsigned int* data = pDataDst + j * width;
			uchar* bound = pBound + j * width;
			for (size_t i = 0; i < width; ++i)
				if (!bound[i])
					int comp = u->find(j * width + i);
					if (mvs.find(comp) == mvs.end())
						mvs.insert(std::make_pair(comp, ++nLastIndex));
					data[i] = mvs[comp];

		printf("got %d components\n", num_ccs);
		printf("done! uff...thats hard work.\n");

		delete input;
		delete u;

		return num_ccs;
		int nColIndex = MAX(1, (width /*- 1*/) / block_size /*+ 1*/);		//計算列方向上塊數
		int nRowIndex = MAX(1, (height /*- 1*/) / block_size /*+ 1*/);		//計算行方向塊數

		int nThreadNum = omp_get_num_procs();

		int nBlockWidthB = block_size * 2;						//當前塊寬度
		int nBlockHeightB = block_size * 2;						//當前塊高度
		unsigned char** pDataBlock = new unsigned char*[nThreadNum]();
		unsigned int** pDataBlockDst = new unsigned int*[nThreadNum]();
		uchar** pDataBound = new uchar*[nThreadNum]();
		for (int i = 0; i < nThreadNum; ++i)
			pDataBlock[i] = new unsigned char[nBlockWidthB * nBlockHeightB * 3]();
			pDataBlockDst[i] = new unsigned int[nBlockWidthB * nBlockHeightB]();
			pDataBound[i] = new uchar[nBlockWidthB * nBlockHeightB]();

		int nBISum = nColIndex * nRowIndex;
		int* nCompCount = new int[nBISum]();
#pragma omp parallel for num_threads(nThreadNum)
		for (int nBI = 0; nBI < nBISum; nBI++)
			int nRI = nBI / nColIndex;
			int nCI = nBI % nColIndex;
			unsigned char* pRDataSrc = pDataSrc + nRI * block_size * width * 3;
			unsigned int* pRDataDst = pDataDst + nRI * block_size * width;
			uchar* pRDataBound = pBound + nRI * block_size * width;

			int ti = omp_get_thread_num()/* % nThreadNum*/;
			int nBlockWidth = block_size;
			int nBlockHeight = block_size;
			if (nCI == nColIndex - 1 && width % block_size != 0)nBlockWidth = (width) % block_size + block_size;
			if (nRI == nRowIndex - 1 && height % block_size != 0)nBlockHeight = (height) % block_size + block_size;
			nBlockWidth = MIN(nBlockWidth, width);
			nBlockHeight = MIN(nBlockHeight, height);

			unsigned char* pCDataSrc = pRDataSrc + nCI * block_size * 3;
			unsigned int* pCDataDst = pRDataDst + nCI * block_size;
			uchar* pCDataBound = pRDataBound + nCI * block_size;
			for (size_t j = 0; j < nBlockHeight; ++j)
				unsigned char* pSubDataSrc = pCDataSrc + j * width * 3;
				unsigned char* pSubBlockDataDst = pDataBlock[ti] + j * nBlockWidth * 3;
				uchar* pSubDataBoundSrc = pCDataBound + j * width;
				uchar* pSubDataBoundDst = pDataBound[ti] + j * nBlockWidth;
				memcpy(pSubBlockDataDst, pSubDataSrc, nBlockWidth * 3);
				memcpy(pSubDataBoundDst, pSubDataBoundSrc, nBlockWidth);

			nCompCount[nBI] = ImgSegmentBlockMark(pDataBlock[ti], nBlockWidth, nBlockHeight, pDataBound[ti], pDataBlockDst[ti], -1, nLastIndex, sigma, k, min_size);
			for (size_t j = 0; j < nBlockHeight; ++j)
				unsigned int* pSubDataDst = pCDataDst + j * width;
				unsigned int* pSubBlockDataDst = pDataBlockDst[ti] + j * nBlockWidth;
				memcpy(pSubDataDst, pSubBlockDataDst, nBlockWidth * sizeof(int));

		int nIndexOffset = 0;
		for (int nBI = 1; nBI < nBISum; nBI++)
			int nRI = nBI / nColIndex;
			int nCI = nBI % nColIndex;
			unsigned int* pRDataDst = pDataDst + nRI * block_size * width;
			uchar* pRDataBound = pBound + nRI * block_size * width;
			int nBlockWidth = block_size;
			int nBlockHeight = block_size;
			if (nCI == nColIndex - 1 && width % block_size != 0)nBlockWidth = (width) % block_size + block_size;
			if (nRI == nRowIndex - 1 && height % block_size != 0)nBlockHeight = (height) % block_size + block_size;

			unsigned int* pCDataDst = pRDataDst + nCI * block_size;
			uchar* pCDataBound = pRDataBound + nCI * block_size;
			nIndexOffset += nCompCount[nBI - 1];
			for (size_t j = 0; j < nBlockHeight; ++j)
				unsigned int* pSubDataDst = pCDataDst + j * width;
				uchar* pSubDataBoundSrc = pCDataBound + j * width;
				for (int i = 0; i < nBlockWidth; ++i)
					if (!pSubDataBoundSrc[i])
						pSubDataDst[i] += nIndexOffset;

		if (nBISum > 0)
			nLastIndex = nIndexOffset + nCompCount[nBISum - 1];

		for (int i = 0; i < nThreadNum; ++i)
			delete[] pDataBlock[i];
			delete[] pDataBlockDst[i];
			delete[] pDataBound[i];

		delete[] pDataBlock;
		delete[] pDataBlockDst;
		delete[] pDataBound;
		delete[] nCompCount;

	return nLastIndex;

int ImgSegBlock(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned char* pDataDst, int block_size, float sigma, float k, int min_size)
	if (pDataSrc == nullptr || width <= 0 || height <= 0 || pDataDst == nullptr || min_size <= 0)
		return -1;

	IBufferX* pImage = new IBufferX(pDataSrc, width, height, 3);
	CFindBoundary fb(pImage);
	uchar* pBound = new uchar[width * height]();
	if (nBgValue >= 0)
		fb.Initialize(nBgValue == 0);
		for (int y = 0; y < height; y++)
			for (int x = 0; x < width; x++)
				PtInt ptCur(x, y);
				pBound[y * width + x] = ptCur.IsInBoundInfoLine(fb.m_mBoundInfo[y]);

	delete pImage;

	int num = ImgSegmentBlock(pDataSrc, width, height, pBound, pDataDst, block_size, sigma, k, min_size);

	delete[] pBound;

	return num;

bool IsAreaCrossVBound(AreaLines& area, int& x, int& y)
	size_t nLineCount = area.size();
	for (size_t i = 0; i < nLineCount; ++i)
		CLineSeg sg = area[i];

		if (sg.x_s == 0)
			x = sg.x_s;
			y = sg.y;
			return true;

	return false;

bool IsAreaCrossHBound(AreaLines& area, int& x, int& y)
	size_t nLineCount = area.size();
	for (size_t i = 0; i < nLineCount; ++i)
		CLineSeg sg = area[i];

		if (sg.y == 0)
			x = sg.x_s;
			y = sg.y;
			return true;

	return false;

size_t TransAreas(unsigned int* buffer, unsigned char* bound, int width, int height, int count, std::vector<AreaLines*>& vAreas)
	int nReal = count + 1;
	vAreas = std::vector<AreaLines*>(nReal);
	for (size_t i = 0; i < nReal; ++i)
		vAreas[i] = new AreaLines();

	for (size_t j = 0; j < height; ++j)
		unsigned int* h_buffer = buffer + width * j;
		unsigned char* h_bound = bound + width * j;

		CLineSeg al(-1, -1, j);
		bool bStart = false;
		//bool bEnd = true;
		int cur_idx = -1;
		for (size_t i = 0; i <= width; ++i)
			if (i == width && bStart && cur_idx > 0)
				//bStart = false;
				//bEnd = true;

			if (h_bound[i])
				if (bStart && cur_idx > 0)
					al.x_s = -1;
					cur_idx = -1;
					bStart = false;
				int idx = h_buffer[i];

				if (cur_idx != idx)
					if (cur_idx != -1)

					al.x_s = i;
					bStart = true;
					//bEnd = false;
					cur_idx = idx;
				al.x_e = i;

	return 0;

int FixBlockBorder(unsigned char* pDataSrc, int width, int height, uchar* pBound, unsigned int* pDataDst, int nLastIndex,
	int block_size, float sigma, float k, int min_size)
	if (block_size < 0)
		return -1;

	int nColBorders = (width) / block_size - 1;		//計算列方向上縫隙數

	unsigned char* pNewBlock = new unsigned char[block_size * 3 * height * 3]();
	unsigned char* pNewBound = new unsigned char[block_size * 3 * height]();
	unsigned int* pNewDst = new unsigned int[block_size * 3 * height]();

	for (int nCBIdx = 0; nCBIdx < nColBorders; ++nCBIdx)
		unsigned char* pCDataSrc = pDataSrc + nCBIdx * block_size * 3;
		unsigned int* pCDataDst = pDataDst + nCBIdx * block_size;
		unsigned char* pCDataBound = pBound + nCBIdx * block_size;

		int nBlockWidth = block_size * 2;
		int nBlockHeight = height;
		if (nCBIdx == nColBorders - 1 && width % block_size != 0)nBlockWidth = (width) % block_size + 2 * block_size;

		//unsigned char* pNewBlock = new unsigned char[nBlockWidth * nBlockHeight * 3]();
		//unsigned char* pNewBound = new unsigned char[nBlockWidth * nBlockHeight]();
		//unsigned int* pNewDst = new unsigned int[nBlockWidth * nBlockHeight]();
		memset(pNewBound, 0, nBlockWidth * nBlockHeight/* * sizeof(char)*/);

		std::map<int, int> miiV;
		int x = block_size - 1;	//豎線的x
		//int y = block_size - 1;	//橫線的y

		for (size_t bl = 0; bl < nBlockHeight; ++bl)
			if (!imData(pCDataBound, x, bl, width))
				miiV.insert(std::make_pair(imData(pCDataDst, x, bl, width), bl));

			if (!imData(pCDataBound, x + 1, bl, width))
				miiV.insert(std::make_pair(imData(pCDataDst, x + 1, bl, width), bl));

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned char* pSubDataSrc = pCDataSrc + h * width * 3;
			unsigned char* pSubBlockDataDst = pNewBlock + h * nBlockWidth * 3;
			memcpy(pSubBlockDataDst, pSubDataSrc, nBlockWidth * 3);

		for (size_t h = 0; h < nBlockHeight; ++h)
			uchar* pSubDataBoundSrc = pCDataBound + h * width;
			unsigned int* pSubDataDst = pCDataDst + h * width;
			uchar* pSubDataBoundDst = pNewBound + h * nBlockWidth;
			for (size_t w = 0; w < nBlockWidth; ++w)
				if (pSubDataBoundSrc[w] || miiV.find(pSubDataDst[w]) == miiV.end())
					pSubDataBoundDst[w] = 1;

		//SaveBufferPPM(pNewBlock, nBlockWidth, nBlockHeight);
		//SaveBoundBufferPPM(pNewBound, nBlockWidth, nBlockHeight);
		int num = ImgSegmentBlockMark(pNewBlock, nBlockWidth, nBlockHeight, pNewBound, pNewDst, -1, 0, sigma, k, min_size);
		//SaveBufferPPM(pNewDst, nBlockWidth, nBlockHeight);

		std::vector<AreaLines*> vAreas;
		TransAreas(pNewDst, pNewBound, nBlockWidth, nBlockHeight, num, vAreas);

		for (size_t j = 1; j < vAreas.size(); ++j)
			AreaLines* alsl = vAreas[j];
			for (size_t l = 0; l < alsl->size(); ++l)
				CLineSeg al = (*alsl)[l];
				unsigned int* pSubCData = pNewDst + al.y * nBlockWidth;
				for (size_t u = al.x_s; u <= al.x_e; ++u)
					if (pSubCData[u] != j)
						int a = 0;

					pSubCData[u] = j + 1;

		SaveBufferPPM(pNewDst, nBlockWidth, nBlockHeight);
#endif // 0

		unsigned int* pDstTmp = new unsigned int[nBlockWidth * nBlockHeight];

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned int* pSubDataDst = pCDataDst + h * width;
			unsigned int* pSubDstTmp = pDstTmp + h * nBlockWidth;
			memcpy(pSubDstTmp, pSubDataDst, nBlockWidth * sizeof(int));
		//std::map<int, int> mib;
		for (size_t bl = 0; bl < nBlockHeight; ++bl)
			if (!imData(pCDataBound, x, bl, width))
				int idxl = imData(pCDataDst, x, bl, width);
				int idxl_new = imData(pNewDst, x, bl, nBlockWidth);
				if (vAreas[idxl_new]->size())
// 					if (mib.find(idxl) == mib.end())
// 					{
						std::vector<CLineSeg>* alsl = vAreas[idxl_new];
						int tmp_idx = idxl;
						int cross_x = -1, cross_y = -1;
						if (IsAreaCrossVBound(*alsl, cross_x, cross_y))
							tmp_idx = imData(pCDataDst, cross_x, cross_y, width);
						for (size_t l = 0; l < alsl->size(); ++l)
							CLineSeg al = (*alsl)[l];
							unsigned int* pSubCDataDst = pDstTmp + al.y * nBlockWidth;
							for (size_t u = al.x_s; u <= al.x_e; ++u)
								pSubCDataDst[u] = tmp_idx;
// 						mib.insert(std::make_pair(idxl, idxl_new));
// 					}

				int idxr = imData(pCDataDst, x + 1, bl, width);
				int idxr_new = imData(pNewDst, x + 1, bl, nBlockWidth);
				if (/*idxr == imData(pDstTmp, x + 1, bl, nBlockWidth) && */vAreas[idxr_new]->size())
// 					if (mib.find(idxr) == mib.end())
// 					{
						std::vector<CLineSeg>* alsl = vAreas[idxr_new];
						int tmp_idx = idxr;
						int cross_x = -1, cross_y = -1;
						if (IsAreaCrossVBound(*alsl, cross_x, cross_y))
							tmp_idx = imData(pCDataDst, cross_x, cross_y, width);
						for (size_t l = 0; l < alsl->size(); ++l)
							CLineSeg al = (*alsl)[l];
							unsigned int* pSubCDataDst = pDstTmp + al.y * nBlockWidth;
							for (size_t u = al.x_s; u <= al.x_e; ++u)
								pSubCDataDst[u] = tmp_idx;
// 						mib.insert(std::make_pair(idxr, idxr_new));
// 					}

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned int* pSubDataDst = pCDataDst + h * width;
			unsigned int* pSubDstTmp = pDstTmp + h * nBlockWidth;
			memcpy(pSubDataDst, pSubDstTmp, nBlockWidth * sizeof(int));
#endif // 0

		for (size_t j = 0; j < nBlockHeight; ++j)
			unsigned int* pSubDataDst = pCDataDst + j * width;
			unsigned int* pSubBlockDataDst = pNewDst + j * nBlockWidth;
			memcpy(pSubDataDst, pSubBlockDataDst, nBlockWidth * sizeof(int));
#endif // 0

		delete[] pDstTmp;
		pDstTmp = nullptr;

		for (std::vector<AreaLines*>::iterator it = vAreas.begin(); it != vAreas.end(); ++it)
			if (*it)
				delete *it;
				*it = nullptr;

	delete[] pNewBlock;
	delete[] pNewBound;
	delete[] pNewDst;


	int nRowBorders = (height) / block_size - 1;	//計算行方向上縫隙數

	pNewBlock = new unsigned char[block_size * 3 * width * 3]();
	pNewBound = new unsigned char[block_size * 3 * width]();
	pNewDst = new unsigned int[block_size * 3 * width]();

	//for (int nRBIdx = 0; nRBIdx < 3; ++nRBIdx)
	for (int nRBIdx = 0; nRBIdx < nRowBorders; ++nRBIdx)
		unsigned char* pCDataSrc = pDataSrc + nRBIdx * block_size * width * 3;
		unsigned int* pCDataDst = pDataDst + nRBIdx * block_size * width;
		unsigned char* pCDataBound = pBound + nRBIdx * block_size * width;

		int nBlockWidth = width;
		int nBlockHeight = block_size * 2;
		//if (nRBIdx == nColBorders - 1 && width % block_size != 0)nBlockWidth = (width) % block_size + 2 * block_size;
		if (nRBIdx == nRowBorders - 1 && height % block_size != 0)nBlockHeight = (height) % block_size + 2 * block_size;

		//unsigned char* pNewBlock = new unsigned char[nBlockWidth * nBlockHeight * 3]();
		//unsigned char* pNewBound = new unsigned char[nBlockWidth * nBlockHeight]();
		//unsigned int* pNewDst = new unsigned int[nBlockWidth * nBlockHeight]();
		memset(pNewBound, 0, nBlockWidth * nBlockHeight/* * sizeof(char)*/);

		std::map<int, int> miiH;
		int y = block_size - 1;	//橫線的y

		for (size_t bl = 0; bl < width; ++bl)
			if (!imData(pCDataBound, bl, y, width))
				miiH.insert(std::make_pair(imData(pCDataDst, bl, y, width), bl));

			if (!imData(pCDataBound, bl, y + 1, width))
				miiH.insert(std::make_pair(imData(pCDataDst, bl, y + 1, width), bl));

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned char* pSubDataSrc = pCDataSrc + h * width * 3;
			unsigned char* pSubBlockDataDst = pNewBlock + h * nBlockWidth * 3;
			memcpy(pSubBlockDataDst, pSubDataSrc, nBlockWidth * 3);

		for (size_t h = 0; h < nBlockHeight; ++h)
			uchar* pSubDataBoundSrc = pCDataBound + h * width;
			unsigned int* pSubDataDst = pCDataDst + h * width;
			uchar* pSubDataBoundDst = pNewBound + h * nBlockWidth;
			for (size_t w = 0; w < nBlockWidth; ++w)
				if (pSubDataBoundSrc[w] || miiH.find(pSubDataDst[w]) == miiH.end())
					pSubDataBoundDst[w] = 1;

		//SaveBufferPPM(pNewBlock, nBlockWidth, nBlockHeight);
		//SaveBoundBufferPPM(pNewBound, nBlockWidth, nBlockHeight);
		int num = ImgSegmentBlockMark(pNewBlock, nBlockWidth, nBlockHeight, pNewBound, pNewDst, -1, 0, sigma, k, min_size);
		//SaveBufferPPM(pNewDst, nBlockWidth, nBlockHeight);

		std::vector<AreaLines*> vAreas;
		TransAreas(pNewDst, pNewBound, nBlockWidth, nBlockHeight, num, vAreas);

		for (size_t j = 1; j < vAreas.size(); ++j)
			std::vector<CLineSeg>* alsl = vAreas[j];
			for (size_t l = 0; l < alsl->size(); ++l)
				CLineSeg al = (*alsl)[l];
				unsigned int* pSubCData = pNewDst + al.y * nBlockWidth;
				for (size_t u = al.x_s; u <= al.x_e; ++u)
					pSubCData[u] = j + 1;
		//SaveBufferPPM(pNewDst, nBlockWidth, nBlockHeight);
#endif // 0

		unsigned int* pDstTmp = new unsigned int[nBlockWidth * nBlockHeight];

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned int* pSubDataDst = pCDataDst + h * width;
			unsigned int* pSubDstTmp = pDstTmp + h * nBlockWidth;
			memcpy(pSubDstTmp, pSubDataDst, nBlockWidth * sizeof(int));

		//_tstring stSaveOld = _T("D:\\pOldDst") + CStdTpl::ConvertToString(nRBIdx) +_T(".ppm");
		//std::string sSaveOld = CStdStr::ws2s(stSaveOld);
		//SaveBufferPPM(pDstTmp, nBlockWidth, nBlockHeight, sSaveOld.c_str());

		//_tstring stSavePath = _T("D:\\pNewDst") + CStdTpl::ConvertToString(nRBIdx) +_T(".ppm");
		//std::string sSavePath = CStdStr::ws2s(stSavePath);
		//SaveBufferPPM(pNewDst, nBlockWidth, nBlockHeight, sSavePath.c_str());

		//_tstring stAllPath = _T("D:\\all") + CStdTpl::ConvertToString(2 * nRBIdx) +_T(".ppm");
		//std::string sAllPath = CStdStr::ws2s(stAllPath);
		//SaveBufferPPM(pDataDst, width, height, sAllPath.c_str());

		//std::map<int, int> mib;
		for (size_t bl = 0; bl < nBlockWidth; ++bl)
			if (!imData(pCDataBound, bl, y, width))
				int idxd = imData(pCDataDst, bl, y, width);
				int idxd_new = imData(pNewDst, bl, y, nBlockWidth);
				int idxu = imData(pCDataDst, bl, y + 1, width);
				int idxu_new = imData(pNewDst, bl, y + 1, nBlockWidth);

				if (/*idxd == imData(pDstTmp, bl, y, width) && */vAreas[idxd_new]->size())
					// 					if (mib.find(idxd) == mib.end())
					// 					{
					std::vector<CLineSeg>* alsl = vAreas[idxd_new];
					int tmp_idx = idxd;
					int cross_x = -1, cross_y = -1;
					if (IsAreaCrossHBound(*alsl, cross_x, cross_y))
						tmp_idx = imData(pCDataDst, cross_x, cross_y, width);
					for (size_t l = 0; l < alsl->size(); ++l)
						CLineSeg al = (*alsl)[l];
						unsigned int* pSubCDataDst = pDstTmp + al.y * nBlockWidth;
						for (size_t u = al.x_s; u <= al.x_e; ++u)
							pSubCDataDst[u] = tmp_idx;
					// 						mib.insert(std::make_pair(idxd, idxd_new));
					// 					}

 				if (/*idxu == imData(pDstTmp, bl, y + 1, nBlockWidth) && */vAreas[idxd_new]->size())
 					// 					if (mib.find(idxu) == mib.end())
 					// 					{
					std::vector<CLineSeg>* alsl = vAreas[idxu_new];
					int tmp_idx = idxu;
					int cross_x = -1, cross_y = -1;
					if (IsAreaCrossHBound(*alsl, cross_x, cross_y))
						tmp_idx = imData(pCDataDst, cross_x, cross_y, width);
					for (size_t l = 0; l < alsl->size(); ++l)
						CLineSeg al = (*alsl)[l];
						unsigned int* pSubCDataDst = pDstTmp + al.y * nBlockWidth;
						for (size_t u = al.x_s; u <= al.x_e; ++u)
							pSubCDataDst[u] = tmp_idx;
 					// 						mib.insert(std::make_pair(idxu, idxu_new));
 					// 					}

		for (size_t h = 0; h < nBlockHeight; ++h)
			unsigned int* pSubDataDst = pCDataDst + h * width;
			unsigned int* pSubDstTmp = pDstTmp + h * nBlockWidth;
			memcpy(pSubDataDst, pSubDstTmp, nBlockWidth * sizeof(int));

		//_tstring stAllPathAfter = _T("D:\\all") + CStdTpl::ConvertToString(2 * nRBIdx + 1) +_T(".ppm");
		//std::string sAllPathAfter = CStdStr::ws2s(stAllPathAfter);
		//SaveBufferPPM(pDataDst, width, height, sAllPathAfter.c_str());

#endif // 0

		for (size_t j = 0; j < nBlockHeight; ++j)
			unsigned int* pSubDataDst = pCDataDst + j * width;
			unsigned int* pSubBlockDataDst = pNewDst + j * nBlockWidth;
			memcpy(pSubDataDst, pSubBlockDataDst, nBlockWidth * sizeof(int));
#endif // 0

		delete[] pDstTmp;
		pDstTmp = nullptr;

		for (std::vector<AreaLines*>::iterator it = vAreas.begin(); it != vAreas.end(); ++it)
			if (*it)
				delete *it;
				*it = nullptr;

	delete[] pNewBlock;
	delete[] pNewBound;
	delete[] pNewDst;

	return 0;

int ImgSegBlockMark(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned int* pDataDst, int block_size, float sigma, float k, int min_size)
	if (pDataSrc == nullptr || width <= 0 || height <= 0 || pDataDst == nullptr || min_size <= 0)
		return -1;

	IBufferX* pImage = new IBufferX(pDataSrc, width, height, 3);
	CFindBoundary fb(pImage);

	uchar* pBound = new uchar[width * height]();
	if (nBgValue >= 0)
		fb.Initialize(nBgValue == 0);
		for (int y = 0; y < height; y++)
			for (int x = 0; x < width; x++)
				PtInt ptCur(x, y);
				pBound[y * width + x] = ptCur.IsInBoundInfoLine(fb.m_mBoundInfo[y]);
	delete pImage;
	int nLastIndex = 0;

	nLastIndex = ImgSegmentBlockMark(pDataSrc, width, height, pBound, pDataDst, block_size, nLastIndex, sigma, k, min_size);

	if (block_size > 0 && (width > block_size || height > block_size))
		FixBlockBorder(pDataSrc, width, height, pBound, pDataDst, nLastIndex, block_size, sigma, k, min_size);

	delete[] pBound;

	return nLastIndex;

int KMeansSegment(unsigned char* pDataSrc, int width, int height, int nBgValue, unsigned int* pDataDst, int block_size, float sigma, float k, int min_size)
	if (pDataSrc == nullptr || width <= 0 || height <= 0 || pDataDst == nullptr || min_size <= 0)
		return -1;

	IBufferX* pImage = new IBufferX(pDataSrc, width, height, 3);
	CFindBoundary fb(pImage);

	uchar* pBound = new uchar[width * height]();
	if (nBgValue >= 0)
		fb.Initialize(nBgValue == 0);
		for (int y = 0; y < height; y++)
			for (int x = 0; x < width; x++)
				PtInt ptCur(x, y);
				pBound[y * width + x] = ptCur.IsInBoundInfoLine(fb.m_mBoundInfo[y]);
	delete pImage;
	int nLastIndex = 0;

	nLastIndex = ImgSegmentBlockMark(pDataSrc, width, height, pBound, pDataDst, block_size, nLastIndex, sigma, k, min_size);

	//if (block_size > 0 && (width > block_size || height > block_size))
	//	FixBlockBorder(pDataSrc, width, height, pBound, pDataDst, nLastIndex, block_size, sigma, k, min_size);

	if (nLastIndex)
		std::vector<AreaLines*> vAreas;
		TransAreas(pDataDst, pBound, width, height, nLastIndex, vAreas);
		for (size_t j = 1; j < vAreas.size(); ++j)
			std::vector<CLineSeg>& alsl = vAreas[j]->vLines;
			for (size_t l = 0; l < alsl.size(); ++l)
				CLineSeg al = alsl[l];
				unsigned int* pSubCData = pDataDst + al.y * width;
				for (size_t u = al.x_s; u <= al.x_e; ++u)
					pSubCData[u] = j + 1;
#endif // 0

		int& nWidth = width;
		int& nHeight = height;
		int nBandNum = 3;

		int size = nLastIndex;				//樣本個數,實際上可能比nLastIndex小
		const int dim = nBandNum;			//Dimension of feature
		const int cluster_num = 5;			//Cluster number

		const size_t lMemSize = size * dim;
		double* data = new double[lMemSize * sizeof(double)]();
		int nKCount = 0;
		for (int i = 1; i <= size; ++i)
			std::vector<CLineSeg>* alsl = vAreas[i];
			long lrgbSum[3] = {0};
			long lPixCount = 0;
			for (size_t l = 0; l < alsl->size(); ++l)
				CLineSeg al = (*alsl)[l];
				unsigned char* pSubData = pDataSrc + al.y * width * 3;
				for (size_t u = al.x_s; u <= al.x_e; ++u)
					for (size_t c = 0; c < 3; ++c)
						lrgbSum[c] += pSubData[u * 3 + c];

			if (lPixCount)
				for (size_t c = 0; c < 3; ++c)
					data[nKCount * 3 + c] = lrgbSum[c] / lPixCount;

		size = MIN(size, nKCount);

		KMeans* kmeans = new KMeans(dim, cluster_num);
		int* labels = new int[size];
		kmeans->Cluster(data, size, labels);

 		for (int i = 0; i < size; ++i)
 			//printf("%f, %f, %f belongs to %d cluster\n", data[i*dim + 0], data[i*dim + 1], data[i*dim + 2], labels[i]);
 			std::vector<CLineSeg>* alsl = vAreas[i + 1];
 			for (size_t l = 0; l < alsl->size(); ++l)
 				CLineSeg al = (*alsl)[l];
 				unsigned int* pSubCData = pDataDst + al.y * nWidth;
 				for (size_t u = al.x_s; u <= al.x_e; ++u)
 					pSubCData[u] = labels[i] + 1;	//labels是從0開始的

		delete[] data;
		delete[] labels;
		delete kmeans;
		//CStdTpl::DelPointerSafely(data, true);
		//CStdTpl::DelPointerSafely(labels, true);

		for (std::vector<AreaLines*>::iterator it = vAreas.begin(); it != vAreas.end(); ++it)
			if (*it)
				delete *it;
				*it = nullptr;

		nLastIndex = cluster_num;

	delete[] pBound;

	return nLastIndex;


