libzypp 17.25.7
MediaBlockList.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include <sys/types.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17
18#include <vector>
19#include <iostream>
20#include <fstream>
21
23#include <zypp/base/Logger.h>
24#include <zypp/base/String.h>
25
26using namespace zypp::base;
27
28namespace zypp {
29 namespace media {
30
32{
33 filesize = size;
34 haveblocks = false;
35 chksumlen = 0;
36 chksumpad = 0;
37 rsumlen = 0;
38 rsumpad = 0;
39}
40
41size_t
42MediaBlockList::addBlock(off_t off, size_t size)
43{
44 haveblocks = true;
45 blocks.push_back(MediaBlock( off, size ));
46 return blocks.size() - 1;
47}
48
49void
50MediaBlockList::setFileChecksum(std::string ctype, int cl, unsigned char *c)
51{
52 if (!cl)
53 return;
54 fsumtype = ctype;
55 fsum.resize(cl);
56 memcpy(&fsum[0], c, cl);
57}
58
59const std::vector<unsigned char> &MediaBlockList::getFileChecksum()
60{
61 return fsum;
62}
63
64bool
66{
67 return digest.create(fsumtype);
68}
69
70bool
72{
73 if (!haveFileChecksum())
74 return true;
75 std::vector<unsigned char>dig = digest.digestVector();
76 if (dig.empty() || dig.size() < fsum.size())
77 return false;
78 return memcmp(&dig[0], &fsum[0], fsum.size()) ? false : true;
79}
80
81void
82MediaBlockList::setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad)
83{
84 if (!csl)
85 return;
86 if (!chksumlen)
87 {
88 if (blkno)
89 return;
90 chksumlen = csl;
91 chksumtype = cstype;
92 chksumpad = cspad;
93 }
94 if (csl != chksumlen || cstype != chksumtype || cspad != chksumpad || blkno != chksums.size() / chksumlen)
95 return;
96 chksums.resize(chksums.size() + csl);
97 memcpy(&chksums[csl * blkno], cs, csl);
98}
99
100void
101MediaBlockList::setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad)
102{
103 if (!rsl)
104 return;
105 if (!rsumlen)
106 {
107 if (blkno)
108 return;
109 rsumlen = rsl;
110 rsumpad = rspad;
111 }
112 if (rsl != rsumlen || rspad != rsumpad || blkno != rsums.size())
113 return;
114 rsums.push_back(rs);
115}
116
117bool
119{
120 return digest.create(chksumtype);
121}
122
123bool
124MediaBlockList::verifyDigest(size_t blkno, Digest &digest) const
125{
126 if (!haveChecksum(blkno))
127 return true;
128 size_t size = blocks[blkno].size;
129 if (!size)
130 return true;
131 if (chksumpad > size)
132 {
133 char pad[chksumpad - size];
134 memset(pad, 0, chksumpad - size);
135 digest.update(pad, chksumpad - size);
136 }
137 std::vector<unsigned char>dig = digest.digestVector();
138 if (dig.empty() || dig.size() < size_t(chksumlen))
139 return false;
140 return memcmp(&dig[0], &chksums[chksumlen * blkno], chksumlen) ? false : true;
141}
142
143unsigned int
144MediaBlockList::updateRsum(unsigned int rs, const char* bytes, size_t len) const
145{
146 if (!len)
147 return rs;
148 unsigned short s, m;
149 s = (rs >> 16) & 65535;
150 m = rs & 65535;
151 for (; len > 0 ; len--)
152 {
153 unsigned short c = (unsigned char)*bytes++;
154 s += c;
155 m += s;
156 }
157 return (s & 65535) << 16 | (m & 65535);
158}
159
160bool
161MediaBlockList::verifyRsum(size_t blkno, unsigned int rs) const
162{
163 if (!haveRsum(blkno))
164 return true;
165 size_t size = blocks[blkno].size;
166 if (!size)
167 return true;
168 if (rsumpad > size)
169 {
170 unsigned short s, m;
171 s = (rs >> 16) & 65535;
172 m = rs & 65535;
173 m += s * (rsumpad - size);
174 rs = (s & 65535) << 16 | (m & 65535);
175 }
176 switch(rsumlen)
177 {
178 case 3:
179 rs &= 0xffffff;
180 case 2:
181 rs &= 0xffff;
182 case 1:
183 rs &= 0xff;
184 default:
185 break;
186 }
187 return rs == rsums[blkno];
188}
189
190bool
191MediaBlockList::checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
192{
193 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
194 return false;
195 unsigned int rs = updateRsum(0, (const char *)buf, blocks[blkno].size);
196 return verifyRsum(blkno, rs);
197}
198
199bool
200MediaBlockList::checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
201{
202 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
203 return false;
204 Digest dig;
205 if (!createDigest(dig))
206 return false;
207 dig.update((const char *)buf, blocks[blkno].size);
208 return verifyDigest(blkno, dig);
209}
210
211std::vector<unsigned char> MediaBlockList::getChecksum(size_t blkno)
212{
213 if ( !haveChecksum(blkno) )
214 return {};
215
216 std::vector<unsigned char> buf ( chksumlen, '\0' );
217 memcpy( buf.data(), chksums.data()+(chksumlen * blkno), chksumlen );
218 return buf;
219}
220
221// specialized version of checkChecksum that can deal with a "rotated" buffer
222bool
223MediaBlockList::checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
224{
225 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
226 return false;
227 if (start == bufl)
228 start = 0;
229 Digest dig;
230 if (!createDigest(dig))
231 return false;
232 size_t size = blocks[blkno].size;
233 size_t len = bufl - start > size ? size : bufl - start;
234 dig.update((const char *)buf + start, len);
235 if (size > len)
236 dig.update((const char *)buf, size - len);
237 return verifyDigest(blkno, dig);
238}
239
240// write block to the file. can also deal with "rotated" buffers
241void
242MediaBlockList::writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, std::vector<bool> &found) const
243{
244 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
245 return;
246 off_t off = blocks[blkno].off;
247 size_t size = blocks[blkno].size;
248 if (fseeko(fp, off, SEEK_SET))
249 return;
250 if (start == bufl)
251 start = 0;
252 size_t len = bufl - start > size ? size : bufl - start;
253 if (fwrite(buf + start, len, 1, fp) != 1)
254 return;
255 if (size > len && fwrite(buf, size - len, 1, fp) != 1)
256 return;
257 found[blkno] = true;
258 found[blocks.size()] = true;
259}
260
261static size_t
262fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
263{
264 size_t l = blksize;
265 int c;
266
267 if (pushback)
268 {
269 if (pushbackp != bp)
270 memmove(bp, pushbackp, pushback);
271 bp += pushback;
272 l -= pushback;
273 }
274 while (l)
275 {
276 c = getc(fp);
277 if (c == EOF)
278 break;
279 *bp++ = c;
280 l--;
281 }
282 if (l)
283 memset(bp, 0, l);
284 return blksize - l;
285}
286
287
288void
289MediaBlockList::reuseBlocks(FILE *wfp, std::string filename)
290{
291 FILE *fp;
292
293 if (!chksumlen || (fp = fopen(filename.c_str(), "r")) == 0)
294 return;
295 size_t nblks = blocks.size();
296 std::vector<bool> found;
297 found.resize(nblks + 1);
298 if (rsumlen && !rsums.empty())
299 {
300 size_t blksize = blocks[0].size;
301 if (nblks == 1 && rsumpad && rsumpad > blksize)
302 blksize = rsumpad;
303 // create hash of checksums
304 unsigned int hm = rsums.size() * 2;
305 while (hm & (hm - 1))
306 hm &= hm - 1;
307 hm = hm * 2 - 1;
308 if (hm < 16383)
309 hm = 16383;
310 unsigned int *ht = new unsigned int[hm + 1];
311 memset(ht, 0, (hm + 1) * sizeof(unsigned int));
312 for (unsigned int i = 0; i < rsums.size(); i++)
313 {
314 if (blocks[i].size != blksize && (i != nblks - 1 || rsumpad != blksize))
315 continue;
316 unsigned int r = rsums[i];
317 unsigned int h = r & hm;
318 unsigned int hh = 7;
319 while (ht[h])
320 h = (h + hh++) & hm;
321 ht[h] = i + 1;
322 }
323
324 unsigned char *buf = new unsigned char[blksize];
325 unsigned char *buf2 = new unsigned char[blksize];
326 size_t pushback = 0;
327 unsigned char *pushbackp = 0;
328 int bshift = 0;
329 if ((blksize & (blksize - 1)) == 0)
330 for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
331 ;
332 unsigned short a, b;
333 a = b = 0;
334 memset(buf, 0, blksize);
335 bool eof = 0;
336 bool init = 1;
337 int sql = nblks > 1 && chksumlen < 16 ? 2 : 1;
338 while (!eof)
339 {
340 for (size_t i = 0; i < blksize; i++)
341 {
342 int c;
343 if (eof)
344 c = 0;
345 else
346 {
347 if (pushback)
348 {
349 c = *pushbackp++;
350 pushback--;
351 }
352 else
353 c = getc(fp);
354 if (c == EOF)
355 {
356 eof = true;
357 c = 0;
358 if (!i || sql == 2)
359 break;
360 }
361 }
362 int oc = buf[i];
363 buf[i] = c;
364 a += c - oc;
365 if (bshift)
366 b += a - (oc << bshift);
367 else
368 b += a - oc * blksize;
369 if (init)
370 {
371 if (size_t(i) != blksize - 1)
372 continue;
373 init = 0;
374 }
375 unsigned int r;
376 if (rsumlen == 1)
377 r = ((unsigned int)b & 255);
378 else if (rsumlen == 2)
379 r = ((unsigned int)b & 65535);
380 else if (rsumlen == 3)
381 r = ((unsigned int)a & 255) << 16 | ((unsigned int)b & 65535);
382 else
383 r = ((unsigned int)a & 65535) << 16 | ((unsigned int)b & 65535);
384 unsigned int h = r & hm;
385 unsigned int hh = 7;
386 for (; ht[h]; h = (h + hh++) & hm)
387 {
388 size_t blkno = ht[h] - 1;
389 if (rsums[blkno] != r)
390 continue;
391 if (found[blkno])
392 continue;
393 if (sql == 2)
394 {
395 if (eof || blkno + 1 >= nblks)
396 continue;
397 pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
398 pushbackp = buf2;
399 if (!pushback)
400 continue;
401 if (!checkRsum(blkno + 1, buf2, blksize))
402 continue;
403 }
404 if (!checkChecksumRotated(blkno, buf, blksize, i + 1))
405 continue;
406 if (sql == 2 && !checkChecksum(blkno + 1, buf2, blksize))
407 continue;
408 writeBlock(blkno, wfp, buf, blksize, i + 1, found);
409 if (sql == 2)
410 {
411 writeBlock(blkno + 1, wfp, buf2, blksize, 0, found);
412 pushback = 0;
413 blkno++;
414 }
415 while (!eof)
416 {
417 blkno++;
418 pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
419 pushbackp = buf2;
420 if (!pushback)
421 break;
422 if (!checkRsum(blkno, buf2, blksize))
423 break;
424 if (!checkChecksum(blkno, buf2, blksize))
425 break;
426 writeBlock(blkno, wfp, buf2, blksize, 0, found);
427 pushback = 0;
428 }
429 init = false;
430 memset(buf, 0, blksize);
431 a = b = 0;
432 i = size_t(-1); // start with 0 on next iteration
433 break;
434 }
435 }
436 }
437 delete[] buf2;
438 delete[] buf;
439 delete[] ht;
440 }
441 else if (chksumlen >= 16)
442 {
443 // dummy variant, just check the checksums
444 size_t bufl = 4096;
445 off_t off = 0;
446 unsigned char *buf = new unsigned char[bufl];
447 for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
448 {
449 if (off > blocks[blkno].off)
450 continue;
451 size_t blksize = blocks[blkno].size;
452 if (blksize > bufl)
453 {
454 delete[] buf;
455 bufl = blksize;
456 buf = new unsigned char[bufl];
457 }
458 size_t skip = blocks[blkno].off - off;
459 while (skip)
460 {
461 size_t l = skip > bufl ? bufl : skip;
462 if (fread(buf, l, 1, fp) != 1)
463 break;
464 skip -= l;
465 off += l;
466 }
467 if (fread(buf, blksize, 1, fp) != 1)
468 break;
469 if (checkChecksum(blkno, buf, blksize))
470 writeBlock(blkno, wfp, buf, blksize, 0, found);
471 off += blksize;
472 }
473 }
474 if (!found[nblks])
475 return;
476 // now throw out all of the blocks we found
477 std::vector<MediaBlock> nblocks;
478 std::vector<unsigned char> nchksums;
479 std::vector<unsigned int> nrsums;
480
481 for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
482 {
483 if (!found[blkno])
484 {
485 // still need it
486 nblocks.push_back(blocks[blkno]);
487 if (chksumlen && (blkno + 1) * chksumlen <= chksums.size())
488 {
489 nchksums.resize(nblocks.size() * chksumlen);
490 memcpy(&nchksums[(nblocks.size() - 1) * chksumlen], &chksums[blkno * chksumlen], chksumlen);
491 }
492 if (rsumlen && (blkno + 1) <= rsums.size())
493 nrsums.push_back(rsums[blkno]);
494 }
495 }
496 blocks = nblocks;
497 chksums = nchksums;
498 rsums = nrsums;
499}
500
501std::string
503{
504 std::string s;
505 size_t i, j;
506
507 if (filesize != off_t(-1))
508 {
509 long long size = filesize;
510 s = zypp::str::form("[ BlockList, file size %lld\n", size);
511 }
512 else
513 s = "[ BlockList, filesize unknown\n";
514 if (!haveblocks)
515 s += " No block information\n";
516 if (chksumpad)
517 s += zypp::str::form(" Checksum pad %zd\n", chksumpad);
518 if (rsumpad)
519 s += zypp::str::form(" Rsum pad %zd\n", rsumpad);
520 for (i = 0; i < blocks.size(); ++i)
521 {
522 long long off=blocks[i].off;
523 long long size=blocks[i].size;
524 s += zypp::str::form(" (%8lld, %8lld)", off, size);
525 if (chksumlen && chksums.size() >= (i + 1) * chksumlen)
526 {
527 s += " " + chksumtype + ":";
528 for (j = 0; j < size_t(chksumlen); j++)
529 s += zypp::str::form("%02hhx", chksums[i * chksumlen + j]);
530 }
531 if (rsumlen && rsums.size() > i)
532 {
533 s += " RSUM:";
534 s += zypp::str::form("%0*x", 2 * rsumlen, rsums[i]);
535 }
536 s += "\n";
537 }
538 s += "]";
539 return s;
540}
541
542 } // namespace media
543} // namespace zypp
544
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:46
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:226
std::vector< unsigned char > digestVector()
get vector of unsigned char representation of the digest
Definition: Digest.cc:208
bool create(const std::string &name)
initialize creation of a new message digest
Definition: Digest.cc:157
std::vector< unsigned int > rsums
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
bool haveChecksum(size_t blkno) const
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
bool verifyRsum(size_t blkno, unsigned int rs) const
void reuseBlocks(FILE *wfp, std::string filename)
scan a file for blocks from our blocklist.
void writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, std::vector< bool > &found) const
bool createDigest(Digest &digest) const
std::string asString() const
return block list as string
bool checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
std::vector< unsigned char > chksums
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
unsigned int updateRsum(unsigned int rs, const char *bytes, size_t len) const
std::vector< unsigned char > getChecksum(size_t blkno)
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
std::vector< unsigned char > fsum
bool verifyDigest(size_t blkno, Digest &digest) const
const std::vector< unsigned char > & getFileChecksum()
bool checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
std::vector< MediaBlock > blocks
bool createFileDigest(Digest &digest) const
bool verifyFileDigest(Digest &digest) const
bool haveRsum(size_t blkno) const
bool checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
MediaBlockList(off_t filesize=off_t(-1))
static size_t fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
std::string form(const char *format,...) __attribute__((format(printf
Printf style construction of std::string.
Definition: String.cc:36
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:2
a single block from the blocklist, consisting of an offset and a size