34#include <visp3/core/vpConfig.h>
35#include <visp3/core/vpIoTools.h>
37#if defined(VISP_HAVE_MINIZ) && defined(VISP_HAVE_WORKING_REGEX)
43#include "basisu_miniz.h"
45using namespace buminiz;
51#define UNUSED(x) ((void)(x))
61#if defined(ENABLE_VISP_NAMESPACE)
65void reverse_data(std::shared_ptr<std::vector<char> > &data_holder,
const std::vector<size_t> &shape,
66 size_t word_size,
char data_type)
69 size_t total_size = shape[0];
70 for (
size_t i = 1;
i < shape.size();
i++) {
71 total_size *= shape[
i];
75 if (data_type ==
'c') {
76 const size_t half_word_size = word_size / 2;
77 for (
size_t i = 0;
i < total_size;
i++) {
79 std::reverse(data_holder->begin() + i*word_size, data_holder->begin() + i*word_size + half_word_size);
81 std::reverse(data_holder->begin() + i*word_size + half_word_size, data_holder->begin() + (i+1)*word_size);
84 else if (data_type !=
'U') {
85 for (
size_t i = 0;
i < total_size;
i++) {
86 std::reverse(data_holder->begin() + i*word_size, data_holder->begin() + (i+1)*word_size);
92 if (data_type ==
'c') {
93 const size_t half_word_size = word_size / 2;
95 std::reverse(data_holder->begin(), data_holder->begin() + half_word_size);
97 std::reverse(data_holder->begin() + half_word_size, data_holder->begin() + word_size);
99 else if (data_type !=
'U') {
100 std::reverse(data_holder->begin(), data_holder->begin() + word_size);
104 if (data_type ==
'U') {
105 size_t utf32_size = 4;
106 for (
size_t i = 0;
i < data_holder->size();
i += utf32_size) {
107 std::reverse(data_holder->begin() + i, data_holder->begin() + (i+utf32_size));
112uint16_t swap16bits_if(uint16_t val,
bool swap)
121uint32_t swap32bits_if(uint32_t val,
bool swap)
135 AutoCloser() : fp(nullptr) { }
148 return (((
reinterpret_cast<char *
>(&x))[0]) ?
'<' :
'>');
153 if (t ==
typeid(
float)) {
return 'f'; }
154 if (t ==
typeid(
double)) {
return 'f'; }
155 if (t ==
typeid(
long double)) {
return 'f'; }
157 if (t ==
typeid(
int)) {
return 'i'; }
158 if (t ==
typeid(
char)) {
return 'i'; }
159 if (t ==
typeid(
short)) {
return 'i'; }
160 if (t ==
typeid(
long)) {
return 'i'; }
161 if (t ==
typeid(
long long)) {
return 'i'; }
163 if (t ==
typeid(
unsigned char)) {
return 'u'; }
164 if (t ==
typeid(
unsigned short)) {
return 'u'; }
165 if (t ==
typeid(
unsigned long)) {
return 'u'; }
166 if (t ==
typeid(
unsigned long long)) {
return 'u'; }
167 if (t ==
typeid(
unsigned int)) {
return 'u'; }
169 if (t ==
typeid(
bool)) {
return 'b'; }
171 if (t ==
typeid(std::complex<float>)) {
return 'c'; }
172 if (t ==
typeid(std::complex<double>)) {
return 'c'; }
173 if (t ==
typeid(std::complex<long double>)) {
return 'c'; }
175 if (t ==
typeid(std::string)) {
return 'U'; }
181 bool &fortran_order,
bool &little_endian,
char &data_type)
183 uint16_t header_len = *
reinterpret_cast<uint16_t *
>(buffer+8);
184 std::string header(
reinterpret_cast<char *
>(buffer+9), header_len);
187 size_t loc1 = header.find(
"fortran_order")+16;
188 fortran_order = (header.substr(loc1, 4) ==
"True" ?
true :
false);
191 loc1 = header.find(
"(");
192 size_t loc2 = header.find(
")");
194 std::regex num_regex(
"[0-9][0-9]*");
198 std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
199 while (std::regex_search(str_shape, sm, num_regex)) {
201 shape.push_back(std::stoll(sm[0].str()));
202 str_shape = sm.suffix().str();
208 loc1 = header.find(
"descr")+9;
209 little_endian = ((header[loc1] ==
'<') || (header[loc1] ==
'|') ? true :
false);
210 data_type = header[loc1+1];
212 std::string str_ws = header.substr(loc1+2);
213 loc2 = str_ws.find(
"'");
214 word_size = atoll(str_ws.substr(0, loc2).c_str());
218 bool &fortran_order,
bool &little_endian,
char &data_type)
221 size_t res = fread(buffer,
sizeof(
char), 11, fp);
223 std::ostringstream oss;
224 oss <<
"parse_npy_header: failed fread, res=" << res;
225 throw std::runtime_error(oss.str());
227 std::string header = fgets(buffer, 256, fp);
228 assert(header[header.size()-1] ==
'\n');
233 loc1 = header.find(
"fortran_order");
234 if (loc1 == std::string::npos) {
235 throw std::runtime_error(
"parse_npy_header: failed to find header keyword: 'fortran_order'");
238 fortran_order = (header.substr(loc1, 4) ==
"True" ?
true :
false);
241 loc1 = header.find(
"(");
242 loc2 = header.find(
")");
243 if ((loc1 == std::string::npos) || (loc2 == std::string::npos)) {
244 throw std::runtime_error(
"parse_npy_header: failed to find header keyword: '(' or ')'");
247 std::regex num_regex(
"[0-9][0-9]*");
251 std::string str_shape = header.substr(loc1+1, loc2-loc1-1);
252 while (std::regex_search(str_shape, sm, num_regex)) {
254 shape.push_back(std::stoll(sm[0].str()));
255 str_shape = sm.suffix().str();
261 loc1 = header.find(
"descr");
262 if (loc1 == std::string::npos) {
263 throw std::runtime_error(
"parse_npy_header: failed to find header keyword: 'descr'");
266 little_endian = ((header[loc1] ==
'<') || (header[loc1] ==
'|') ? true :
false);
267 data_type = header[loc1+1];
269 std::string str_ws = header.substr(loc1+2);
270 loc2 = str_ws.find(
"'");
271 word_size = atoll(str_ws.substr(0, loc2).c_str());
272 if (data_type ==
'U') {
279 std::vector<char> footer(22);
280 fseek(fp, -22, SEEK_END);
281 size_t res = fread(&footer[0],
sizeof(
char), 22, fp);
283 std::ostringstream oss;
284 oss <<
"parse_zip_footer: failed fread, res=" << res;
285 throw std::runtime_error(oss.str());
288 uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
289#ifdef VISP_BIG_ENDIAN
298 disk_no = *(uint16_t *)&footer[4];
299 disk_start = *(uint16_t *)&footer[6];
300 nrecs_on_disk = *(uint16_t *)&footer[8];
301 nrecs = *(uint16_t *)&footer[10];
302 global_header_size = *(uint32_t *)&footer[12];
303 global_header_offset = *(uint32_t *)&footer[16];
304 comment_len = *(uint16_t *)&footer[20];
307 UNUSED(disk_no); assert(disk_no == 0);
308 UNUSED(disk_start); assert(disk_start == 0);
309 UNUSED(nrecs_on_disk); assert(nrecs_on_disk == nrecs);
310 UNUSED(comment_len); assert(comment_len == 0);
315 std::vector<size_t> shape;
317 bool fortran_order, little_endian;
318 char data_type =
'i';
322 size_t nread = fread(arr.data<
char>(), 1, arr.num_bytes(), fp);
323 if (nread != arr.num_bytes()) {
324 std::ostringstream oss;
325 oss <<
"load_the_npy_file: failed fread, nread=" << nread <<
" ; num_bytes=" << arr.num_bytes();
326 throw std::runtime_error(oss.str());
329#ifdef VISP_LITTLE_ENDIAN
330 if (!little_endian) {
331 reverse_data(arr.data_holder, arr.shape, arr.word_size, data_type);
335 reverse_data(arr.data_holder, arr.shape, arr.word_size, data_type);
343 std::vector<unsigned char> buffer_compr(compr_bytes);
344 std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
345 size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp);
346 if (nread != compr_bytes) {
347 std::ostringstream oss;
348 oss <<
"load_the_npz_array: failed fread, nread=" << nread <<
" ; compr_bytes=" << compr_bytes;
349 throw std::runtime_error(oss.str());
354 d_stream.zalloc = Z_NULL;
355 d_stream.zfree = Z_NULL;
356 d_stream.opaque = Z_NULL;
357 d_stream.avail_in = 0;
358 d_stream.next_in = Z_NULL;
359 int err = inflateInit2(&d_stream, -MAX_WBITS);
362 std::ostringstream oss;
363 oss <<
"load_the_npz_array: zlib inflateInit2 failed ; err=" << err;
364 throw std::runtime_error(oss.str());
367 d_stream.avail_in = compr_bytes;
368 d_stream.next_in = &buffer_compr[0];
369 d_stream.avail_out = uncompr_bytes;
370 d_stream.next_out = &buffer_uncompr[0];
372 err = inflate(&d_stream, Z_FINISH);
374 std::ostringstream oss;
375 oss <<
"load_the_npz_array: zlib inflate failed ; err=" << err;
376 throw std::runtime_error(oss.str());
378 err = inflateEnd(&d_stream);
380 std::ostringstream oss;
381 oss <<
"load_the_npz_array: zlib inflateEnd failed ; err=" << err;
382 throw std::runtime_error(oss.str());
385 std::vector<size_t> shape;
388 bool little_endian =
true;
389 char data_type =
'i';
394 size_t offset = uncompr_bytes - array.num_bytes();
395 memcpy(array.data<
unsigned char>(), &buffer_uncompr[0]+offset, array.num_bytes());
397#ifdef VISP_LITTLE_ENDIAN
398 if (!little_endian) {
399 reverse_data(array.data_holder, array.shape, array.word_size, data_type);
403 reverse_data(array.data_holder, array.shape, array.word_size, data_type);
427 closer.fp = fopen(fname.c_str(),
"rb");
430 throw std::runtime_error(
"npz_load: Error! Unable to open file " + fname +
"!");
435 const unsigned int index_2 = 2;
436 const unsigned int index_3 = 3;
437 const unsigned int index_26 = 26;
438 const unsigned int index_28 = 28;
439 const unsigned int val_8 = 8;
440 const unsigned int val_18 = 18;
441 const unsigned int val_22 = 22;
442 const unsigned int val_30 = 30;
444 bool host_is_LE =
true;
445#ifndef VISP_LITTLE_ENDIAN
449 const bool header_file_is_LE =
true;
450 bool same_endianness = (host_is_LE == header_file_is_LE);
452 std::vector<char> local_header(val_30);
453 size_t headerres = fread(&local_header[0],
sizeof(
char), val_30, closer.fp);
454 if (headerres != 30) {
455 throw std::runtime_error(
"npz_load: failed fread 1");
459 if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
464 uint16_t name_len = swap16bits_if(*(uint16_t *)&local_header[index_26], !same_endianness);
465 std::string varname(name_len,
' ');
466 size_t vname_res = fread(&varname[0],
sizeof(
char), name_len, closer.fp);
467 if (vname_res != name_len) {
468 throw std::runtime_error(
"npz_load: failed fread 2");
472 varname.erase(varname.end()-4, varname.end());
475 uint16_t extra_field_len = swap16bits_if(*(uint16_t *)&local_header[index_28], !same_endianness);
476 if (extra_field_len > 0) {
477 std::vector<char> buff(extra_field_len);
478 size_t efield_res = fread(&buff[0],
sizeof(
char), extra_field_len, closer.fp);
479 if (efield_res != extra_field_len) {
480 throw std::runtime_error(
"npz_load: failed fread 3");
484 uint16_t compr_method = swap16bits_if(*
reinterpret_cast<uint16_t *
>(&local_header[0] + val_8), !same_endianness);
485 uint32_t compr_bytes = swap32bits_if(*
reinterpret_cast<uint32_t *
>(&local_header[0] + val_18), !same_endianness);
486 uint32_t uncompr_bytes = swap32bits_if(*
reinterpret_cast<uint32_t *
>(&local_header[0] + val_22), !same_endianness);
488 if (compr_method == 0) {
489 arrays[varname] = load_the_npy_file(closer.fp);
492 arrays[varname] = load_the_npz_array(closer.fp, compr_bytes, uncompr_bytes);
518 closer.fp = fopen(fname.c_str(),
"rb");
521 throw std::runtime_error(
"npz_load: Unable to open file " + fname +
"!");
525 const unsigned int index_2 = 2;
526 const unsigned int index_3 = 3;
527 const unsigned int index_26 = 26;
528 const unsigned int index_28 = 28;
529 const unsigned int val_8 = 8;
530 const unsigned int val_18 = 18;
531 const unsigned int val_22 = 22;
532 const unsigned int val_30 = 30;
534 bool host_is_LE =
true;
535#ifndef VISP_LITTLE_ENDIAN
539 const bool header_file_is_LE =
true;
540 bool same_endianness = (host_is_LE == header_file_is_LE);
542 std::vector<char> local_header(val_30);
543 size_t header_res = fread(&local_header[0],
sizeof(
char), val_30, closer.fp);
544 if (header_res != 30) {
545 throw std::runtime_error(
"npz_load 2: failed fread");
549 if ((local_header[index_2] != 0x03) || (local_header[index_3] != 0x04)) {
554 uint16_t name_len = swap16bits_if(*(uint16_t *)&local_header[index_26], !same_endianness);
555 std::string vname(name_len,
' ');
556 size_t vname_res = fread(&vname[0],
sizeof(
char), name_len, closer.fp);
557 if (vname_res != name_len) {
558 throw std::runtime_error(
"npz_load 2: failed fread");
560 vname.erase(vname.end()-4, vname.end());
563 uint16_t extra_field_len = swap16bits_if(*(uint16_t *)&local_header[index_28], !same_endianness);
564 fseek(closer.fp, extra_field_len, SEEK_CUR);
566 uint16_t compr_method = swap16bits_if(*
reinterpret_cast<uint16_t *
>(&local_header[0] + val_8), !same_endianness);
567 uint32_t compr_bytes = swap32bits_if(*
reinterpret_cast<uint32_t *
>(&local_header[0] + val_18), !same_endianness);
568 uint32_t uncompr_bytes = swap32bits_if(*
reinterpret_cast<uint32_t *
>(&local_header[0] + val_22), !same_endianness);
570 if (vname == varname) {
571 NpyArray array = (compr_method == 0) ? load_the_npy_file(closer.fp) : load_the_npz_array(closer.fp, compr_bytes, uncompr_bytes);
576 uint32_t size = swap32bits_if(*(uint32_t *)&local_header[22], !same_endianness);
577 fseek(closer.fp, size, SEEK_CUR);
583 throw std::runtime_error(
"npz_load 2: Variable name " + varname +
" not found in " + fname);
601 closer.fp = fopen(fname.c_str(),
"rb");
604 throw std::runtime_error(
"npy_load: Unable to open file " + fname +
"!");
607 NpyArray arr = load_the_npy_file(closer.fp);
658 size_t max_length = *std::max_element(lengths.begin(), lengths.end());
660 std::vector<char> dict;
661 dict +=
"{'descr': '";
663 dict +=
map_type(
typeid(std::string));
664 dict += std::to_string(max_length);
665 if (shape.size() > 0) {
666 dict +=
"', 'fortran_order': False, 'shape': (";
667 dict += std::to_string(shape[0]);
668 for (
size_t i = 1; i < shape.size(); ++i) {
670 dict += std::to_string(shape[i]);
672 if (shape.size() == 1) dict +=
",";
676 dict +=
"', 'fortran_order': False, 'shape': (";
680 int remainder = 16 - (10 + dict.size()) % 16;
681 dict.insert(dict.end(), remainder,
' ');
684 std::vector<char> header;
685 header +=
static_cast<char>(0x93);
687 header +=
static_cast<char>(0x01);
688 header +=
static_cast<char>(0x00);
689#ifdef VISP_BIG_ENDIAN
692 header +=
static_cast<uint16_t
>(dict.size());
694 header.insert(header.end(), dict.begin(), dict.end());
729 std::vector<char> utf32Vector;
730 utf32Vector.resize(4*max_size, 0);
732 for (
size_t i = 0, idx = 0; i < utf8.length(); i++, idx += 4) {
733#ifdef VISP_BIG_ENDIAN
734 utf32Vector[idx+3] = utf8[i];
736 utf32Vector[idx] = utf8[i];
758void visp::cnpy::npz_save(
const std::string &zipname, std::string fname,
const std::vector<std::string> &data_vec,
const std::vector<size_t> &shape,
const std::string &mode)
760 if (data_vec.empty()) {
773 size_t global_header_offset = 0;
774 std::vector<char> global_header;
776 if (mode ==
"a") fp = fopen(zipname.c_str(),
"r+b");
783 size_t global_header_size;
785 fseek(fp,
static_cast<long>(global_header_offset), SEEK_SET);
786 global_header.resize(global_header_size);
787 size_t res = fread(&global_header[0],
sizeof(
char), global_header_size, fp);
788 if (res != global_header_size) {
789 throw std::runtime_error(
"npz_save: header read error while adding to existing zip");
791 fseek(fp,
static_cast<long>(global_header_offset), SEEK_SET);
794 fp = fopen(zipname.c_str(),
"wb");
797 std::vector<size_t> lengths;
798 lengths.reserve(data_vec.size());
799 for (
auto data_str : data_vec) {
800 lengths.push_back(data_str.length());
804 size_t max_length = *std::max_element(lengths.begin(), lengths.end());
806 std::vector<char> data_str_utf32_LE;
807 data_str_utf32_LE.reserve(max_length*4);
808 for (
size_t i = 0; i < lengths.size(); i++) {
810 data_str_utf32_LE.insert(data_str_utf32_LE.end(), substr_utf32.begin(), substr_utf32.end());
814 size_t nels = data_str_utf32_LE.size();
815 size_t nbytes = nels*
sizeof(char) + npy_header.size();
818 uint32_t crc = vp_mz_crc32(0L, (uint8_t *)&npy_header[0], npy_header.size());
820 crc = vp_mz_crc32(crc, (uint8_t *)&data_str_utf32_LE[0], nels*
sizeof(uint8_t));
824 std::vector<char> local_header;
825 local_header +=
"PK";
826#ifdef VISP_BIG_ENDIAN
839 local_header +=
static_cast<uint16_t
>(0x0403);
840 local_header +=
static_cast<uint16_t
>(20);
841 local_header +=
static_cast<uint16_t
>(0);
842 local_header +=
static_cast<uint16_t
>(0);
843 local_header +=
static_cast<uint16_t
>(0);
844 local_header +=
static_cast<uint16_t
>(0);
845 local_header +=
static_cast<uint32_t
>(crc);
846 local_header +=
static_cast<uint32_t
>(nbytes);
847 local_header +=
static_cast<uint32_t
>(nbytes);
848 local_header +=
static_cast<uint16_t
>(fname.size());
849 local_header +=
static_cast<uint16_t
>(0);
851 local_header += fname;
854 global_header +=
"PK";
855#ifdef VISP_BIG_ENDIAN
858 global_header.insert(global_header.end(), local_header.begin()+4, local_header.begin()+30);
859 global_header +=
static_cast<uint16_t
>(0);
860 global_header +=
static_cast<uint16_t
>(0);
861 global_header +=
static_cast<uint16_t
>(0);
862 global_header +=
static_cast<uint32_t
>(0);
865 global_header +=
static_cast<uint16_t
>(0x0201);
866 global_header +=
static_cast<uint16_t
>(20);
867 global_header.insert(global_header.end(), local_header.begin()+4, local_header.begin()+30);
868 global_header +=
static_cast<uint16_t
>(0);
869 global_header +=
static_cast<uint16_t
>(0);
870 global_header +=
static_cast<uint16_t
>(0);
871 global_header +=
static_cast<uint32_t
>(0);
872 global_header +=
static_cast<uint32_t
>(global_header_offset);
874 global_header += fname;
877 std::vector<char> footer;
879#ifdef VISP_BIG_ENDIAN
881 footer +=
static_cast<uint16_t
>(0);
882 footer +=
static_cast<uint16_t
>(0);
886 footer +=
vpEndian::swap32bits(
static_cast<uint32_t
>(global_header_offset + nbytes + local_header.size()));
888 footer +=
static_cast<uint16_t
>(0x0605);
889 footer +=
static_cast<uint16_t
>(0);
890 footer +=
static_cast<uint16_t
>(0);
891 footer +=
static_cast<uint16_t
>(nrecs+1);
892 footer +=
static_cast<uint16_t
>(nrecs+1);
893 footer +=
static_cast<uint32_t
>(global_header.size());
894 footer +=
static_cast<uint32_t
>(global_header_offset + nbytes + local_header.size());
896 footer +=
static_cast<uint16_t
>(0);
899 fwrite(&local_header[0],
sizeof(
char), local_header.size(), fp);
900 fwrite(&npy_header[0],
sizeof(
char), npy_header.size(), fp);
901 fwrite(&data_str_utf32_LE[0],
sizeof(
char), nels, fp);
902 fwrite(&global_header[0],
sizeof(
char), global_header.size(), fp);
903 fwrite(&footer[0],
sizeof(
char), footer.size(), fp);
919void visp::cnpy::npz_save(
const std::string &zipname,
const std::string &fname,
const std::string &data_str,
const std::string &mode)
921 std::vector<std::string> data_vec;
922 data_vec.push_back(data_str);
923 std::vector<size_t> shape { 1 };
924 npz_save(zipname, fname, data_vec, shape, mode);
error that can be emitted by ViSP classes.
@ badValue
Used to indicate that a value is not in the allowed range.
@ dimensionError
Bad dimension.
Helpers to convert NPY/NPZ format to/from ViSP format.
VISP_EXPORT npz_t npz_load(const std::string &fname)
VISP_EXPORT char map_type(const std::type_info &t)
std::map< std::string, NpyArray > npz_t
VISP_EXPORT NpyArray npy_load(const std::string &fname)
std::vector< char > utf8_to_utf32_vec_pad(const std::string &utf8, const std::size_t &max_size)
VISP_EXPORT void parse_zip_footer(FILE *fp, uint16_t &nrecs, size_t &global_header_size, size_t &global_header_offset)
VISP_EXPORT void npz_save(const std::string &zipname, std::string fname, const std::vector< std::string > &data_vec, const std::vector< size_t > &shape, const std::string &mode="w")
std::vector< char > create_npy_header_string(const std::vector< size_t > &shape, const std::vector< size_t > &lengths)
VISP_EXPORT char BigEndianTest()
VISP_EXPORT void parse_npy_header(FILE *fp, size_t &word_size, std::vector< size_t > &shape, bool &fortran_order, bool &little_endian, char &data_type)
VISP_EXPORT uint32_t swap32bits(uint32_t val)
VISP_EXPORT uint16_t swap16bits(uint16_t val)