libzypp 17.25.7
MetaLinkParser.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
14#include <zypp/base/Logger.h>
15
16#include <sys/types.h>
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
20
21#include <vector>
22#include <algorithm>
23#include <iostream>
24#include <fstream>
25
26#include <libxml2/libxml/SAX2.h>
27
28using namespace zypp::base;
29
30namespace zypp {
31 namespace media {
32
33enum state {
52};
53
55 enum state from;
56 std::string ename;
57 enum state to;
59};
60
61static struct stateswitch stateswitches[] = {
62 { STATE_START, "metalink", STATE_METALINK, 0 },
63 { STATE_METALINK, "files", STATE_FILES, 0 },
64 { STATE_METALINK, "file", STATE_M4FILE, 0 },
65 { STATE_FILES, "file", STATE_FILE, 0 },
66 { STATE_FILE, "size", STATE_SIZE, 1 },
67 { STATE_FILE, "verification", STATE_VERIFICATION, 0 },
68 { STATE_FILE, "resources", STATE_RESOURCES, 0 },
69 { STATE_VERIFICATION, "hash", STATE_HASH, 1 },
70 { STATE_VERIFICATION, "pieces", STATE_PIECES, 0 },
71 { STATE_PIECES, "hash", STATE_PHASH, 1 },
72 { STATE_RESOURCES, "url", STATE_URL, 1 },
73 { STATE_M4FILE, "size", STATE_M4SIZE, 1 },
74 { STATE_M4FILE, "hash", STATE_M4HASH, 1},
75 { STATE_M4FILE, "url", STATE_M4URL, 1},
76 { STATE_M4FILE, "pieces", STATE_M4PIECES, 0},
77 { STATE_M4PIECES, "hash", STATE_M4PHASH, 1 },
78 { NUMSTATES }
79};
80
81struct ml_url {
83 : priority( 0 )
84 {}
86 std::string url;
87};
88
89static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
90static void XMLCALL endElement(void *userData, const xmlChar *name);
91static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
92
95 : parser( nullptr )
96 , depth( 0 )
98 , statedepth( 0 )
99 , content( reinterpret_cast<char *>(malloc(256)) )
100 , lcontent( 0 )
101 , acontent( 256 )
102 , docontent( 0 )
103 , called( 0 )
104 , gotfile( 0 )
105 , size( -1 )
106 , nurls( 0 )
107 , blksize( 0 )
108 , npiece( 0 )
109 , piecel( 0 )
110 , nsha1( 0 )
111 , nzsync( 0 )
112 , chksuml( 0 )
113 {
114 struct stateswitch *sw;
115 int i;
116 memset( swtab, 0, sizeof(swtab) );
117 memset( sbtab, 0, sizeof(sbtab) );
118 for (i = 0, sw = stateswitches; sw->from != NUMSTATES; i++, sw++)
119 {
120 if (!swtab[sw->from])
121 swtab[sw->from] = sw;
122 sbtab[sw->to] = sw->from;
123 }
124
125 xmlSAXHandler sax;
126 memset(&sax, 0, sizeof(sax));
127 sax.startElement = startElement;
128 sax.endElement = endElement;
129 sax.characters = characterData;
130
131 //internally creates a copy of xmlSaxHandler, so having it as local variable is save
132 parser = xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL);
133 }
134
136 {
137 if (parser) {
138 xmlFreeParserCtxt(parser);
139 parser = nullptr;
140 }
141 free(content);
142 }
143
144 xmlParserCtxtPtr parser;
145 int depth;
148 char *content;
154
157 off_t size;
158 std::vector<struct ml_url> urls;
159 int nurls;
160 size_t blksize;
161
162 std::vector<unsigned char> piece;
165
166 std::vector<unsigned char> sha1;
167 int nsha1;
168 std::vector<unsigned char> zsync;
170
171 std::vector<unsigned char> chksum;
173};
174
175static const char *
176find_attr(const char *txt, const xmlChar **atts)
177{
178 if(!atts) {
179 return 0;
180 }
181
182 for (; *atts; atts += 2)
183 {
184 if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
185 return reinterpret_cast<const char*>(atts[1]);
186 }
187 return 0;
188}
189
190static void XMLCALL
191startElement(void *userData, const xmlChar *name, const xmlChar **atts)
192{
193 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
194 struct stateswitch *sw;
195 if (pd->depth != pd->statedepth)
196 {
197 pd->depth++;
198 return;
199 }
200 pd->depth++;
201 if (!pd->swtab[pd->state])
202 return;
203 for (sw = pd->swtab[pd->state]; sw->from == pd->state; sw++) /* find name in statetable */
204 if (sw->ename == reinterpret_cast<const char *>(name))
205 break;
206 if (sw->from != pd->state)
207 return;
208 if ((sw->to == STATE_FILE || sw->to == STATE_M4FILE) && pd->gotfile++)
209 return; /* ignore all but the first file */
210 //printf("start depth %d name %s\n", pd->depth, name);
211 pd->state = sw->to;
212 pd->docontent = sw->docontent;
213 pd->statedepth = pd->depth;
214 pd->lcontent = 0;
215 *pd->content = 0;
216 switch(pd->state)
217 {
218 case STATE_URL:
219 case STATE_M4URL:
220 {
221 const char *priority = find_attr("priority", atts);
222 const char *preference = find_attr("preference", atts);
223 int prio;
224 pd->urls.push_back(ml_url());
225 if (priority)
226 prio = atoi(priority);
227 else if (preference)
228 prio = 101 - atoi(preference);
229 else
230 prio = 999999;
231 pd->urls.back().priority = prio;
232 break;
233 }
234 case STATE_PIECES:
235 case STATE_M4PIECES:
236 {
237 const char *type = find_attr("type", atts);
238 const char *length = find_attr("length", atts);
239 size_t blksize;
240
241 if (!type || !length)
242 {
243 pd->state = pd->sbtab[pd->state];
244 pd->statedepth--;
245 break;
246 }
247 blksize = strtoul(length, 0, 10);
248 if (!blksize || (pd->blksize && pd->blksize != blksize))
249 {
250 pd->state = pd->sbtab[pd->state];
251 pd->statedepth--;
252 break;
253 }
254 pd->blksize = blksize;
255 pd->npiece = 0;
256 pd->piece.clear();
257 if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
258 pd->piecel = 20;
259 else if (!strcmp(type, "zsync"))
260 pd->piecel = 4;
261 else
262 {
263 pd->state = pd->sbtab[pd->state];
264 pd->statedepth--;
265 break;
266 }
267 break;
268 }
269 case STATE_HASH:
270 case STATE_M4HASH:
271 {
272 const char *type = find_attr("type", atts);
273 if (!type)
274 type = "?";
275 if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
276 pd->chksuml = 20;
277 else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
278 pd->chksuml = 32;
279 else
280 {
281 pd->state = pd->sbtab[pd->state];
282 pd->statedepth--;
283 pd->docontent = 0;
284 }
285 break;
286 }
287 case STATE_PHASH:
288 case STATE_M4PHASH:
289 {
290 const char *piece = find_attr("piece", atts);
291 if (pd->state == STATE_PHASH && (!piece || atoi(piece) != pd->npiece))
292 {
293 pd->state = pd->sbtab[pd->state];
294 pd->statedepth--;
295 }
296 break;
297 }
298 default:
299 break;
300 }
301}
302
303static int
304hexstr2bytes(unsigned char *buf, const char *str, int buflen)
305{
306 int i;
307 for (i = 0; i < buflen; i++)
308 {
309#define c2h(c) (((c)>='0' && (c)<='9') ? ((c)-'0') \
310 : ((c)>='a' && (c)<='f') ? ((c)-('a'-10)) \
311 : ((c)>='A' && (c)<='F') ? ((c)-('A'-10)) \
312 : -1)
313 int v = c2h(*str);
314 str++;
315 if (v < 0)
316 return 0;
317 buf[i] = v;
318 v = c2h(*str);
319 str++;
320 if (v < 0)
321 return 0;
322 buf[i] = (buf[i] << 4) | v;
323#undef c2h
324 }
325 return buflen;
326}
327
328static void XMLCALL
329endElement(void *userData, const xmlChar *name)
330{
331 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
332 // printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
333 if (pd->depth != pd->statedepth)
334 {
335 pd->depth--;
336 return;
337 }
338 pd->depth--;
339 pd->statedepth--;
340 switch (pd->state)
341 {
342 case STATE_SIZE:
343 case STATE_M4SIZE:
344 pd->size = (off_t)strtoull(pd->content, 0, 10);
345 break;
346 case STATE_HASH:
347 case STATE_M4HASH:
348 pd->chksum.clear();
349 pd->chksum.resize(pd->chksuml, 0);
350 if (strlen(pd->content) != size_t(pd->chksuml) * 2 || !hexstr2bytes(&pd->chksum[0], pd->content, pd->chksuml))
351 {
352 pd->chksum.clear();
353 pd->chksuml = 0;
354 }
355 break;
356 case STATE_PHASH:
357 case STATE_M4PHASH:
358 if (strlen(pd->content) != size_t(pd->piecel) * 2)
359 break;
360 pd->piece.resize(pd->piecel * (pd->npiece + 1), 0);
361 if (!hexstr2bytes(&pd->piece[pd->piecel * pd->npiece], pd->content, pd->piecel))
362 {
363 pd->piece.resize(pd->piecel * pd->npiece, 0);
364 break;
365 }
366 pd->npiece++;
367 break;
368 case STATE_PIECES:
369 case STATE_M4PIECES:
370 if (pd->piecel == 4)
371 {
372 pd->zsync = pd->piece;
373 pd->nzsync = pd->npiece;
374 }
375 else
376 {
377 pd->sha1 = pd->piece;
378 pd->nsha1 = pd->npiece;
379 }
380 pd->piecel = pd->npiece = 0;
381 pd->piece.clear();
382 break;
383 case STATE_URL:
384 case STATE_M4URL:
385 if (*pd->content)
386 {
387 pd->urls[pd->nurls].url = std::string(pd->content);
388 pd->nurls++;
389 }
390 break;
391 default:
392 break;
393 }
394 pd->state = pd->sbtab[pd->state];
395 pd->docontent = 0;
396}
397
398static void XMLCALL
399characterData(void *userData, const xmlChar *s, int len)
400{
401 struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
402 int l;
403 char *c;
404 if (!pd->docontent)
405 return;
406 l = pd->lcontent + len + 1;
407 if (l > pd->acontent)
408 {
409 pd->content = reinterpret_cast<char *>(realloc(pd->content, l + 256));
410 pd->acontent = l + 256;
411 }
412 c = pd->content + pd->lcontent;
413 pd->lcontent += len;
414 while (len-- > 0)
415 *c++ = *s++;
416 *c = 0;
417}
418
419
421 : pd( new ml_parsedata )
422{}
423
425{
426 delete pd;
427}
428
429void
431{
432 parse(InputStream(filename));
433}
434
435void
437{
438 char buf[4096];
439 if (!is.stream())
440 ZYPP_THROW(Exception("MetaLinkParser: no such file"));
441 while (is.stream().good())
442 {
443 is.stream().read(buf, sizeof(buf));
444 parseBytes(buf, is.stream().gcount());
445 }
446 parseEnd();
447}
448
449void
450MetaLinkParser::parseBytes(const char *buf, size_t len)
451{
452 if (!len)
453 return;
454
455 if (xmlParseChunk(pd->parser, buf, len, 0)) {
456 ZYPP_THROW(Exception("Parse Error"));
457 }
458}
459
460static bool urlcmp(const ml_url &a, const ml_url &b)
461{
462 return a.priority < b.priority;
463}
464
465void
467{
468 if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
469 ZYPP_THROW(Exception("Parse Error"));
470 }
471 if (pd->nurls)
472 stable_sort(pd->urls.begin(), pd->urls.end(), urlcmp);
473}
474
475std::vector<Url>
477{
478 std::vector<Url> urls;
479 int i;
480 for (i = 0; i < pd->nurls; ++i)
481 urls.push_back(Url(pd->urls[i].url));
482 return urls;
483}
484
487{
488 size_t i;
490 if (pd->chksuml == 20)
491 bl.setFileChecksum("SHA1", pd->chksuml, &pd->chksum[0]);
492 else if (pd->chksuml == 32)
493 bl.setFileChecksum("SHA256", pd->chksuml, &pd->chksum[0]);
494 if (pd->size != off_t(-1) && pd->blksize)
495 {
496 size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
497 off_t off = 0;
498 size_t size = pd->blksize;
499 for (i = 0; i < nb; i++)
500 {
501 if (i == nb - 1)
502 {
503 size = pd->size % pd->blksize;
504 if (!size)
505 size = pd->blksize;
506 }
507 size_t blkno = bl.addBlock(off, size);
508 if (int(i) < pd->nsha1)
509 {
510 bl.setChecksum(blkno, "SHA1", 20, &pd->sha1[20 * i]);
511 if (int(i) < pd->nzsync)
512 {
513 unsigned char *p = &pd->zsync[4 * i];
514 bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
515 }
516 }
517 off += pd->blksize;
518 }
519 }
520 return bl;
521}
522
523 } // namespace media
524} // namespace zypp
525
#define nullptr
Definition: Easy.h:55
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:392
#define c2h(c)
Base class for Exception.
Definition: Exception.h:146
Helper to create and pass std::istream.
Definition: InputStream.h:57
std::istream & stream() const
The std::istream.
Definition: InputStream.h:93
Url manipulation class.
Definition: Url.h:92
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void parseEnd()
tells the parser that all chunks are now processed
MediaBlockList getBlockList()
return the block list from the parsed metalink data
std::vector< Url > getUrls()
return the download urls from the parsed metalink data
struct ml_parsedata * pd
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
String related utilities and Regular expression matching.
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
static struct stateswitch stateswitches[]
static const char * find_attr(const char *txt, const xmlChar **atts)
static void XMLCALL endElement(void *userData, const xmlChar *name)
static int hexstr2bytes(unsigned char *buf, const char *str, int buflen)
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
static bool urlcmp(const ml_url &a, const ml_url &b)
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:2
struct stateswitch * swtab[NUMSTATES]
enum state sbtab[NUMSTATES]
std::vector< unsigned char > zsync
std::vector< unsigned char > piece
std::vector< unsigned char > chksum
std::vector< struct ml_url > urls
std::vector< unsigned char > sha1