libzypp  17.31.31
metalinkparser.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include "metalinkparser.h"
14 #include <zypp-core/base/Logger.h>
15 #include <zypp-core/ByteArray.h>
16 #include <zypp-core/AutoDispose.h>
17 
18 #include <stack>
19 #include <vector>
20 #include <algorithm>
21 
22 #include <libxml2/libxml/SAX2.h>
23 
24 using namespace zypp::base;
25 
26 namespace zypp::env
27 {
29  inline bool ZYPP_METALINK_DEBUG()
30  {
31  static bool val = [](){
32  const char * env = getenv("ZYPP_METALINK_DEBUG");
33  return( env && zypp::str::strToBool( env, true ) );
34  }();
35  return val;
36  }
37 }
38 
39 namespace zypp::media {
40  enum ParserState {
59  };
60 
61  struct transition {
62  std::string elementName; //< Name of the element for the transition to trigger
63  ParserState transitionTo; //< The state we go into when the element name in \a elementName is encountered
64  int docontent; //< Store the content of the element in the \a content member
65  };
66 
72  const std::unordered_map<ParserState, std::vector<transition> > & transitions () {
73  static std::unordered_map<ParserState, std::vector<transition> > map {
74  { STATE_START, {
75  { "metalink", STATE_METALINK, 0},
76  }
77  },
78  { STATE_METALINK, {
79  { "files", STATE_FILES, 0 },
80  { "file", STATE_M4FILE, 0 },
81  }
82  },
83  { STATE_FILES, {
84  { "file", STATE_FILE, 0},
85  }
86  },
87  { STATE_FILE, {
88  { "size", STATE_SIZE, 1 },
89  { "verification", STATE_VERIFICATION, 0 },
90  { "resources", STATE_RESOURCES, 0 },
91  }
92  },
94  { "hash", STATE_HASH, 1 },
95  { "pieces", STATE_PIECES, 0 },
96  }
97  },
98  { STATE_PIECES, {
99  { "hash", STATE_PHASH, 1 },
100  }
101  },
102  { STATE_RESOURCES, {
103  { "url", STATE_URL, 1 },
104  }
105  },
106  { STATE_M4FILE, {
107  { "size", STATE_M4SIZE, 1 },
108  { "hash", STATE_M4HASH, 1},
109  { "url", STATE_M4URL, 1},
110  { "pieces", STATE_M4PIECES, 0},
111  }
112  },
113  { STATE_M4PIECES, {
114  { "hash", STATE_M4PHASH, 1 },
115  }
116  },
117  };
118 
119  return map;
120  }
121 
122 static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts);
123 static void XMLCALL endElement(void *userData, const xmlChar *name);
124 static void XMLCALL characterData(void *userData, const xmlChar *s, int len);
125 
128  : parser( nullptr )
129  , state( STATE_START )
130  , depth( 0 )
131  , statedepth( 0 )
132  , docontent( 0 )
133  , gotfile( 0 )
134  , size( -1 )
135  , blksize( 0 )
136  , piecel( 0 )
137  , chksuml( 0 )
138  {
139  content.reserve( 256 );
140 
141  xmlSAXHandler sax;
142  memset(&sax, 0, sizeof(sax));
143  sax.startElement = startElement;
144  sax.endElement = endElement;
145  sax.characters = characterData;
146 
147  //internally creates a copy of xmlSaxHandler, so having it as local variable is save
148  parser = AutoDispose<xmlParserCtxtPtr>( xmlCreatePushParserCtxt(&sax, this, NULL, 0, NULL), xmlFreeParserCtxt );
149  }
150 
151  void doTransition ( const transition &t ) {
152  parentStates.push( state );
153  state = t.transitionTo;
154  docontent = t.docontent;
155  statedepth = depth;
156  content.clear();
157  }
158 
159  void popState () {
160  state = parentStates.top();
161  statedepth--;
162  parentStates.pop();
163 
164  }
165 
167 
168  ParserState state; //< current state as defined in \ref stateswitch
169  std::stack<ParserState> parentStates;
170 
171  int depth; //< current element depth of traversing the document elements
172 
179 
180  std::string content; //< content of the current element
181  int docontent; //< should the content of the current elem be parsed
182 
183  int gotfile;
184  off_t size;
185  std::vector<MetalinkMirror> urls;
186  size_t blksize;
187 
188  std::vector<UByteArray> piece;
189  int piecel;
190 
191  std::vector<UByteArray> sha1;
192  std::vector<UByteArray> zsync;
193 
195  int chksuml;
196 };
197 
202 static const char *
203 find_attr(const char *txt, const xmlChar **atts)
204 {
205  if(!atts) {
206  return nullptr;
207  }
208 
209  for (; *atts; atts += 2)
210  {
211  if (!strcmp(reinterpret_cast<const char*>(*atts), txt))
212  return reinterpret_cast<const char*>(atts[1]);
213  }
214  return nullptr;
215 }
216 
217 static void XMLCALL
218 startElement(void *userData, const xmlChar *name, const xmlChar **atts)
219 {
220  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
221 
222  // if the current element depth does not match the expected depth for the current state we
223  // ignore the element and just increase the depth
224  if (pd->depth != pd->statedepth) {
225  pd->depth++;
226  return;
227  }
228  pd->depth++;
229 
230  const auto &trMap = transitions();
231  const auto currStateTrs = trMap.find( pd->state );
232  if ( currStateTrs == trMap.end() )
233  return;
234 
235  // check if the current element name is part of our transitions
236  auto foundTr = std::find_if( currStateTrs->second.begin(), currStateTrs->second.end(), [name]( const auto &tr ){
237  return tr.elementName == reinterpret_cast<const char *>(name);
238  });
239 
240  if ( foundTr == currStateTrs->second.end() ) {
241  // we found no possible transition, ignore
242  return;
243  }
244 
245  if ( ( foundTr->transitionTo == STATE_FILE || foundTr->transitionTo == STATE_M4FILE ) && pd->gotfile++)
246  return; /* ignore all but the first file */
247 
248  // advance the state machine and prepare variables for the new state
249  pd->doTransition( *foundTr );
250 
251  switch(pd->state)
252  {
253  case STATE_URL:
254  case STATE_M4URL:
255  {
256  const char *priority = find_attr("priority", atts);
257  const char *preference = find_attr("preference", atts);
258  const char *maxconnections = find_attr("maxconnections", atts);
259  int prio;
260  auto &mirr = pd->urls.emplace_back();
261  if (priority)
262  prio = str::strtonum<int>(priority);
263  else if (preference)
264  prio = 101 - str::strtonum<int>(preference);
265  else
266  prio = 999999;
267  mirr.priority = prio;
268 
269  if ( maxconnections )
270  mirr.maxConnections = str::strtonum<int>( maxconnections );
271 
272  break;
273  }
274  case STATE_PIECES:
275  case STATE_M4PIECES:
276  {
277  const char *type = find_attr("type", atts);
278  const char *length = find_attr("length", atts);
279  size_t blksize;
280 
281  if (!type || !length)
282  {
283  pd->popState();
284  break;
285  }
286  blksize = str::strtonum<unsigned long>(length);
287  if (!blksize || (pd->blksize && pd->blksize != blksize))
288  {
289  pd->popState();
290  break;
291  }
292  pd->blksize = blksize;
293  pd->piece.clear();
294  if (!strcmp(type, "sha1") || !strcmp(type, "sha-1"))
295  pd->piecel = 20;
296  else if (!strcmp(type, "zsync"))
297  pd->piecel = 4;
298  else
299  {
300  pd->popState();
301  break;
302  }
303  break;
304  }
305  case STATE_HASH:
306  case STATE_M4HASH:
307  {
308  const char *type = find_attr("type", atts);
309  if (!type)
310  type = "?";
311  if ((!strcmp(type, "sha1") || !strcmp(type, "sha-1")) && pd->chksuml < 20)
312  pd->chksuml = 20;
313  else if (!strcmp(type, "sha256") || !strcmp(type, "sha-256"))
314  pd->chksuml = 32;
315  else
316  {
317  pd->popState();
318  pd->docontent = 0;
319  }
320  break;
321  }
322  case STATE_PHASH:
323  case STATE_M4PHASH:
324  {
325  const char *piece = find_attr("piece", atts);
326  if ( pd->state == STATE_PHASH && (!piece || str::strtonum<uint>(piece) != pd->piece.size()) )
327  {
328  pd->popState();
329  }
330  break;
331  }
332  default:
333  break;
334  }
335 }
336 
338 {
339  return Digest::hexStringToUByteArray( str );
340 }
341 
342 static void XMLCALL
343 endElement(void *userData, const xmlChar *)
344 {
345  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
346  //printf("end depth %d-%d name %s\n", pd->depth, pd->statedepth, name);
347  if (pd->depth != pd->statedepth)
348  {
349  pd->depth--;
350  return;
351  }
352  switch (pd->state)
353  {
354  case STATE_SIZE:
355  case STATE_M4SIZE:
356  pd->size = (off_t)str::strtonum<off_t>(pd->content); //strtoull(pd->content, 0, 10);
357  break;
358  case STATE_HASH:
359  case STATE_M4HASH:
360  pd->chksum.clear();
361  pd->chksum = hexstr2bytes( pd->content );
362  if ( pd->content.length() != size_t(pd->chksuml) * 2 || !pd->chksum.size() )
363  {
364  pd->chksum.clear();
365  pd->chksuml = 0;
366  }
367  break;
368  case STATE_PHASH:
369  case STATE_M4PHASH: {
370  if ( pd->content.length() != size_t(pd->piecel) * 2 )
371  break;
372  UByteArray pieceHash = hexstr2bytes( pd->content );
373  if ( !pieceHash.size() )
374  pieceHash.resize( pd->piecel, 0 );
375  pd->piece.push_back( pieceHash );
376  break;
377  }
378  case STATE_PIECES:
379  case STATE_M4PIECES:
380  if (pd->piecel == 4)
381  pd->zsync = pd->piece;
382  else
383  pd->sha1 = pd->piece;
384 
385  pd->piecel = 0;
386  pd->piece.clear();
387  break;
388  case STATE_URL:
389  case STATE_M4URL:
390  if ( pd->content.length() )
391  pd->urls.back().url = std::string(pd->content);
392  else
393  // without a actual URL the mirror is useless
394  pd->urls.pop_back();
395  break;
396  default:
397  break;
398  }
399 
400  pd->depth--;
401  pd->popState();
402  pd->docontent = 0;
403 }
404 
405 static void XMLCALL
406 characterData(void *userData, const xmlChar *s, int len)
407 {
408  struct ml_parsedata *pd = reinterpret_cast<struct ml_parsedata *>(userData);
409  if (!pd->docontent)
410  return;
411 
412  if ( pd->content.length() + len + 1 > pd->content.capacity() )
413  pd->content.reserve( pd->content.capacity() + 256 );
414  pd->content.append( s, s+len );
415 }
416 
417 
418 MetaLinkParser::MetaLinkParser()
419  : pd( new ml_parsedata )
420 {}
421 
423 {
424  delete pd;
425 }
426 
427 void
429 {
430  parse(InputStream(filename));
431 }
432 
433 void
435 {
436  char buf[4096];
437  if (!is.stream())
438  ZYPP_THROW(Exception("MetaLinkParser: no such file"));
439  while (is.stream().good())
440  {
441  is.stream().read(buf, sizeof(buf));
442  parseBytes(buf, is.stream().gcount());
443  }
444  parseEnd();
445  MIL << "Parsed " << pd->urls.size() << " mirrors from " << is.path() << std::endl;
446  if ( env::ZYPP_METALINK_DEBUG() ) {
447  for ( const auto &mirr : pd->urls )
448  DBG << "- " << mirr.priority << " " << mirr.url << std::endl;
449  }
450 }
451 
452 void
453 MetaLinkParser::parseBytes(const char *buf, size_t len)
454 {
455  if (!len)
456  return;
457 
458  if (xmlParseChunk(pd->parser, buf, len, 0)) {
459  ZYPP_THROW(Exception("Parse Error"));
460  }
461 }
462 
463 void
465 {
466  if (xmlParseChunk(pd->parser, NULL, 0, 1)) {
467  ZYPP_THROW(Exception("Parse Error"));
468  }
469  if (pd->urls.size() ) {
470  stable_sort(pd->urls.begin(), pd->urls.end(), []( const auto &a, const auto &b ){
471  return a.priority < b.priority;
472  });
473  }
474 }
475 
476 std::vector<Url>
478 {
479  std::vector<Url> urls;
480  for ( const auto &mirr : pd->urls )
481  urls.push_back( mirr.url );
482  return urls;
483 }
484 
485 const std::vector<MetalinkMirror> &MetaLinkParser::getMirrors() const
486 {
487  return pd->urls;
488 }
489 
491 {
492  MediaBlockList bl(pd->size);
493  if (pd->chksuml == 20)
494  bl.setFileChecksum("SHA1", pd->chksuml, pd->chksum.data() );
495  else if (pd->chksuml == 32)
496  bl.setFileChecksum("SHA256", pd->chksuml, pd->chksum.data());
497  if (pd->size != off_t(-1) && pd->blksize)
498  {
499  size_t nb = (pd->size + pd->blksize - 1) / pd->blksize;
500  off_t off = 0;
501  size_t size = pd->blksize;
502  for ( size_t i = 0; i < nb; i++ )
503  {
504  if (i == nb - 1)
505  {
506  size = pd->size % pd->blksize;
507  if (!size)
508  size = pd->blksize;
509  }
510  size_t blkno = bl.addBlock(off, size);
511  if ( i < pd->sha1.size())
512  {
513  bl.setChecksum(blkno, "SHA1", 20, pd->sha1[i].data());
514  if ( i < pd->zsync.size())
515  {
516  unsigned char *p = pd->zsync[i].data();
517  bl.setRsum(blkno, 4, p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24, pd->blksize);
518  }
519  }
520  off += pd->blksize;
521  }
522  }
523  return bl;
524 }
525 
526 const std::vector<UByteArray> &MetaLinkParser::getZsyncBlockHashes() const
527 {
528  return pd->zsync;
529 }
530 
531 const std::vector<UByteArray> &MetaLinkParser::getSHA1BlockHashes() const
532 {
533  return pd->sha1;
534 }
535 
536 } // namespace zypp::media
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
#define MIL
Definition: Logger.h:96
const std::vector< UByteArray > & getZsyncBlockHashes() const
Definition: Env.h:22
bool ZYPP_METALINK_DEBUG()
Hack to circumvent the currently poor –root support.
static void XMLCALL characterData(void *userData, const xmlChar *s, int len)
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:428
unsigned short b
std::vector< UByteArray > sha1
static void XMLCALL endElement(void *userData, const xmlChar *name)
void parseBytes(const char *bytes, size_t len)
parse a chunk of a file consisting of metalink xml data.
UByteArray hexstr2bytes(std::string str)
String related utilities and Regular expression matching.
Helper to create and pass std::istream.
Definition: inputstream.h:56
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
const std::unordered_map< ParserState, std::vector< transition > > & transitions()
void parseEnd()
tells the parser that all chunks are now processed
boost::noncopyable NonCopyable
Ensure derived classes cannot be copied.
Definition: NonCopyable.h:26
struct ml_parsedata * pd
#define nullptr
Definition: Easy.h:55
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
const std::vector< MetalinkMirror > & getMirrors() const
return the mirrors from the parsed metalink data
const Pathname & path() const
Path to the input file or empty if no file.
Definition: inputstream.h:111
AutoDispose< xmlParserCtxtPtr > parser
static void XMLCALL startElement(void *userData, const xmlChar *name, const xmlChar **atts)
const std::vector< UByteArray > & getSHA1BlockHashes() const
Base class for Exception.
Definition: Exception.h:145
static const char * find_attr(const char *txt, const xmlChar **atts)
Look up a xml attribute in the passed array atts.
std::istream & stream() const
The std::istream.
Definition: inputstream.h:93
std::vector< UByteArray > zsync
std::stack< ParserState > parentStates
bool strToBool(const C_Str &str, bool default_r)
Parse str into a bool depending on the default value.
Definition: String.h:429
unsigned short a
std::vector< UByteArray > piece
std::vector< MetalinkMirror > urls
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
void doTransition(const transition &t)
#define DBG
Definition: Logger.h:95