/*
    libmaus2
    Copyright (C) 2009-2013 German Tischler
    Copyright (C) 2011-2013 Genome Research Limited

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
#if ! defined(LIBMAUS2_LZ_BGZFINFLATEDEFLATEPARALLEL_HPP)
#define LIBMAUS2_LZ_BGZFINFLATEDEFLATEPARALLEL_HPP

#include <libmaus2/lz/BgzfInflateBlock.hpp>
#include <libmaus2/parallel/StdTerminatableSynchronousHeap.hpp>
#include <libmaus2/parallel/OMPNumThreadsScope.hpp>
#include <libmaus2/lz/BgzfInflateBlockIdComparator.hpp>
#include <libmaus2/lz/BgzfInflateBlockIdInfo.hpp>
#include <libmaus2/lz/BgzfInflateParallelContext.hpp>
#include <libmaus2/lz/BgzfDeflateParallelContext.hpp>
#include <libmaus2/lz/BgzfInflateDeflateParallelThread.hpp>

namespace libmaus2
{
	namespace lz
	{
		struct BgzfInflateDeflateParallel
		{
			typedef BgzfInflateDeflateParallel this_type;
			typedef std::unique_ptr<this_type> unique_ptr_type;
			typedef std::shared_ptr<this_type> shared_ptr_type;

			private:
			libmaus2::parallel::StdTerminatableSynchronousHeap<BgzfThreadQueueElement,BgzfThreadQueueElementHeapComparator> globlist;
			BgzfInflateParallelContext inflatecontext;
			BgzfDeflateParallelContext deflatecontext;

			libmaus2::autoarray::AutoArray<libmaus2::parallel::StdThread::unique_ptr_type> T;

			bool inflateterminated;

			void drain()
			{
				// handle last block
				{
					libmaus2::parallel::ScopeStdMutex Q(deflatecontext.deflateqlock);
					if ( deflatecontext.deflateB[deflatecontext.deflatecurobject]->pc != deflatecontext.deflateB[deflatecontext.deflatecurobject]->pa )
					{
						deflatecontext.deflatecompqueue.push_back(deflatecontext.deflatecurobject);
						deflatecontext.deflategloblist.enque(
							BgzfThreadQueueElement(
								BgzfThreadOpBase::libmaus2_lz_bgzf_op_compress_block,
								deflatecontext.deflatecurobject,
								0 /* block id */
							)
						);
					}
					else
					{
						deflatecontext.deflatefreelist.enque(deflatecontext.deflatecurobject);
					}
				}

				// wait until all threads are idle
				while ( deflatecontext.deflatefreelist.getFillState() < deflatecontext.deflateB.size() )
				{
					// wait for 1/100 s
					// struct timespec waittime = { 0, 10000000 };
					struct timespec waittime = { 1,0 };
					nanosleep(&waittime,0);
				}
			}


			void init()
			{

				for ( uint64_t i = 0; i < T.size(); ++i )
				{
					libmaus2::parallel::StdThreadCallable::unique_ptr_type tcal(
						new BgzfInflateDeflateParallelThreadCallable(inflatecontext,deflatecontext)
					);
					libmaus2::parallel::StdThread::unique_ptr_type tTi(
						new libmaus2::parallel::StdThread(tcal)
					);
					T[i] = std::move(tTi);
					T[i]->start();
				}
			}

			public:
			BgzfInflateDeflateParallel(
				std::istream & rinflatein,
				std::ostream & rdeflateout,
				int const level,
				uint64_t const rnumthreads,
				uint64_t const rnumblocks)
			:
				globlist(2),
				inflatecontext(globlist,rinflatein,rnumblocks),
				deflatecontext(globlist,rdeflateout,rnumblocks,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur()),
				T(rnumthreads),
				inflateterminated(false)
			{
				init();
			}

			BgzfInflateDeflateParallel(
				std::istream & rinflatein,
				std::ostream & rcopyostr,
				std::ostream & rdeflateout,
				int const level,
				uint64_t const rnumthreads,
				uint64_t const rnumblocks)
			:
				globlist(2),
				inflatecontext(globlist,rinflatein,rnumblocks,rcopyostr),
				deflatecontext(globlist,rdeflateout,rnumblocks,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur()),
				T(rnumthreads),
				inflateterminated(false)
			{
				init();
			}

			BgzfInflateDeflateParallel(
				std::istream & rinflatein,
				std::ostream & rdeflateout,
				int const level,
				uint64_t const rnumthreads =
					std::max(std::max(libmaus2::parallel::OMPNumThreadsScope::getMaxThreads(),static_cast<uint64_t>(1))-1,
						static_cast<uint64_t>(1))
			)
			:
				globlist(2),
				inflatecontext(globlist,rinflatein,4*rnumthreads),
				deflatecontext(globlist,rdeflateout,4*rnumthreads,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur()),
				T(rnumthreads),
				inflateterminated(false)
			{
				init();
			}

			BgzfInflateDeflateParallel(
				std::istream & rinflatein,
				std::ostream & rcopyostr,
				std::ostream & rdeflateout,
				int const level,
				uint64_t const rnumthreads =
					std::max(std::max(libmaus2::parallel::OMPNumThreadsScope::getMaxThreads(),static_cast<uint64_t>(1))-1,
						static_cast<uint64_t>(1))
			)
			:
				globlist(2),
				inflatecontext(globlist,rinflatein,4*rnumthreads,rcopyostr),
				deflatecontext(globlist,rdeflateout,4*rnumthreads,level,BgzfDeflateParallelContext::getDefaultDeflateGetCur()),
				T(rnumthreads),
				inflateterminated(false)
			{
				init();
			}

			~BgzfInflateDeflateParallel()
			{
				flush();
			}

			void registerBlockOutputCallback(::libmaus2::lz::BgzfDeflateOutputCallback * cb)
			{
				deflatecontext.blockoutputcallbacks.push_back(cb);
			}

			uint64_t gcount() const
			{
				return inflatecontext.inflategcnt;
			}

			BgzfInflateInfo readAndInfo(char * const data, uint64_t const n)
			{
				inflatecontext.inflategcnt = 0;

				if ( n < libmaus2::lz::BgzfInflateBlock::getBgzfMaxBlockSize() )
				{
					libmaus2::exception::LibMausException se;
					se.getStream() << "BgzfInflateParallel::read(): buffer provided is too small: " << n << " < " << libmaus2::lz::BgzfInflateBlock::getBgzfMaxBlockSize() << std::endl;
					se.finish();
					throw se;
				}

				if ( inflateterminated )
				{
					return BgzfInflateInfo(0,0,true,0 /* crc */);
				}

				/* get object id */
				BgzfThreadQueueElement const btqe = inflatecontext.inflatedecompressedlist.deque();
				uint64_t objectid = btqe.objectid;

				/* we have an exception, terminate readers and throw it at caller */
				if ( inflatecontext.inflateB[objectid]->failed() )
				{
					libmaus2::parallel::ScopeStdMutex Q(inflatecontext.inflateqlock);
					inflatecontext.inflategloblist.terminate();
					inflateterminated = true;
					throw inflatecontext.inflateB[objectid]->getException();
				}
				/* we have what we want */
				else
				{
					assert ( inflatecontext.inflateB[objectid]->blockid == inflatecontext.inflateeb );
				}

				BgzfInflateInfo const info = inflatecontext.inflateB[objectid]->blockinfo;

				/* empty block (EOF) */
				if ( (! info.uncompressed) && info.streameof )
				{
					libmaus2::parallel::ScopeStdMutex Q(inflatecontext.inflateqlock);
					inflatecontext.inflategloblist.terminate();
					inflateterminated = true;
					return info;
				}
				/* block contains data */
				else
				{
					uint64_t const blockid = inflatecontext.inflateB[objectid]->blockid;
					assert ( blockid == inflatecontext.inflateeb );
					inflatecontext.inflateeb += 1;

					std::copy(inflatecontext.inflateB[objectid]->data.begin(), inflatecontext.inflateB[objectid]->data.begin()+info.uncompressed, reinterpret_cast<uint8_t *>(data));

					libmaus2::parallel::ScopeStdMutex Q(inflatecontext.inflateqlock);
					inflatecontext.inflatefreelist.push_back(objectid);
					// read next block
					inflatecontext.inflategloblist.enque(
						BgzfThreadQueueElement(
							libmaus2::lz::BgzfThreadOpBase::libmaus2_lz_bgzf_op_read_block,
							objectid,
							0
						)
					);

					inflatecontext.inflategcnt = info.uncompressed;

					inflatecontext.inflatedecompressedlist.setReadyFor(
						BgzfThreadQueueElement(
							libmaus2::lz::BgzfThreadOpBase::libmaus2_lz_bgzf_op_none,
							0,
							blockid+1
						)
					);

					return info;
				}
			}

			uint64_t read(char * const data, uint64_t const n)
			{
				return readAndInfo(data,n).uncompressed;
			}

			void put(uint8_t const c)
			{
				write(reinterpret_cast<char const *>(&c),1);
			}

			void write(char const * c, uint64_t n)
			{
				while ( n )
				{
					uint64_t const freespace = deflatecontext.deflateB[deflatecontext.deflatecurobject]->pe - deflatecontext.deflateB[deflatecontext.deflatecurobject]->pc;
					uint64_t const towrite = std::min(n,freespace);
					std::copy(reinterpret_cast<uint8_t const *>(c),reinterpret_cast<uint8_t const *>(c)+towrite,deflatecontext.deflateB[deflatecontext.deflatecurobject]->pc);

					c += towrite;
					deflatecontext.deflateB[deflatecontext.deflatecurobject]->pc += towrite;
					n -= towrite;

					// if block is now full
					if ( deflatecontext.deflateB[deflatecontext.deflatecurobject]->pc == deflatecontext.deflateB[deflatecontext.deflatecurobject]->pe )
					{
						// check for exceptions on output
						{
							uint64_t lexceptionid;
							{
								libmaus2::parallel::StdMutex::scope_lock_type slock(deflatecontext.deflateexlock);
								lexceptionid = deflatecontext.deflateexceptionid;
							}

							if ( lexceptionid != std::numeric_limits<uint64_t>::max() )
							{
								drain();

								libmaus2::parallel::StdMutex::scope_lock_type Q(deflatecontext.deflateexlock);
								throw (*(deflatecontext.deflatepse));
							}
						}

						// push data object id into deflate queue
						{
							libmaus2::parallel::ScopeStdMutex Q(deflatecontext.deflateqlock);
							deflatecontext.deflatecompqueue.push_back(deflatecontext.deflatecurobject);
						}

						// register task in global todo list
						deflatecontext.deflategloblist.enque(
							BgzfThreadQueueElement(
								BgzfThreadOpBase::libmaus2_lz_bgzf_op_compress_block,
								deflatecontext.deflatecurobject,
								0 /* block id */
							)
						);

						// get next object
						deflatecontext.deflatecurobject = deflatecontext.deflatefreelist.deque();
						// set block id of next object
						deflatecontext.deflateB[deflatecontext.deflatecurobject]->blockid = deflatecontext.deflateoutid++;
					}
				}
			}


			void flush()
			{
				if ( ! deflatecontext.deflateoutflushed )
				{
					drain();

					deflatecontext.deflategloblist.terminate();

					{
						uint64_t lexceptionid;
						{
							libmaus2::parallel::StdMutex::scope_lock_type slock(deflatecontext.deflateexlock);
							lexceptionid = deflatecontext.deflateexceptionid;
						}

						if ( lexceptionid != std::numeric_limits<uint64_t>::max() )
						{
							libmaus2::parallel::StdMutex::scope_lock_type Q(deflatecontext.deflateexlock);
							throw (*(deflatecontext.deflatepse));
						}
					}

					std::string const eofblock = libmaus2::lz::BgzfDeflateBase::getEOFBlock();
					BgzfDeflateZStreamBaseFlushInfo BDZSBFI(0 /* uncomp size */,eofblock.size());

					deflatecontext.streamWrite(nullptr,reinterpret_cast<uint8_t const *>(eofblock.c_str()),BDZSBFI);

					#if 0
					// write default compressed block with size 0 (EOF marker)
					libmaus2::lz::BgzfDeflateBase eofBase;
					BgzfDeflateZStreamBaseFlushInfo const eofflushsize = eofBase.flush(true /* full flush */);
					assert ( ! eofflushsize.movesize );
					deflatecontext.streamWrite(eofBase.inbuf.begin(),eofBase.outbuf.begin(),eofflushsize);
					#endif

					deflatecontext.deflateoutflushed = true;
				}
			}

		};
	}
}
#endif
