libzypp 17.31.25
MediaMultiCurl.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include <ctype.h>
14#include <sys/types.h>
15#include <signal.h>
16#include <sys/wait.h>
17#include <netdb.h>
18#include <arpa/inet.h>
19
20#include <vector>
21#include <iostream>
22#include <algorithm>
23
24
25#include <zypp/ZConfig.h>
26#include <zypp/base/Logger.h>
28#include <zypp-curl/parser/MetaLinkParser>
29#include <zypp/ManagedFile.h>
31#include <zypp-curl/auth/CurlAuthData>
32
33using std::endl;
34using namespace zypp::base;
35
36#undef CURLVERSION_AT_LEAST
37#define CURLVERSION_AT_LEAST(M,N,O) LIBCURL_VERSION_NUM >= ((((M)<<8)+(N))<<8)+(O)
38
39namespace zypp {
40 namespace media {
41
42
44
45
46class multifetchrequest;
47
48// Hack: we derive from MediaCurl just to get the storage space for
49// settings, url, curlerrors and the like
50
52 friend class multifetchrequest;
53
54public:
55 multifetchworker(int no, multifetchrequest &request, const Url &url);
57 void nextjob();
58 void run();
59 bool checkChecksum();
60 bool recheckChecksum();
61 void disableCompetition();
62
63 void checkdns();
64 void adddnsfd( std::vector<curl_waitfd> &waitFds );
65 void dnsevent( const std::vector<curl_waitfd> &waitFds );
66
68
69 int _state;
71
72 size_t _blkno;
73 off_t _blkstart;
74 size_t _blksize;
76
79 off_t _received;
80
81 double _avgspeed;
82 double _maxspeed;
83
85
86private:
87 void stealjob();
88
89 size_t writefunction(void *ptr, size_t size);
90 static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream);
91
92 size_t headerfunction(char *ptr, size_t size);
93 static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream);
94
96 int _pass;
97 std::string _urlbuf;
98 off_t _off;
99 size_t _size;
101
102 pid_t _pid;
104};
105
106#define WORKER_STARTING 0
107#define WORKER_LOOKUP 1
108#define WORKER_FETCH 2
109#define WORKER_DISCARD 3
110#define WORKER_DONE 4
111#define WORKER_SLEEP 5
112#define WORKER_BROKEN 6
113
114
115
117public:
118 multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize);
120
121 void run(std::vector<Url> &urllist);
122
123protected:
124
125 static size_t makeBlksize ( size_t filesize );
126
127 friend class multifetchworker;
128
132
133 FILE *_fp;
137
138 CURLM *_multi;
139
140 std::list<multifetchworker *> _workers;
143
144 size_t _blkno;
145 size_t _defaultBlksize = 0; //< The blocksize to use if the metalink file does not specify one
146 off_t _blkoff;
155
158
162
163public:
164 double _timeout;
166 double _maxspeed;
168};
169
170constexpr auto MIN_REQ_MIRRS = 4;
171constexpr auto MAXURLS = 10;
172
174
175static double
177{
178 struct timeval tv;
179 if (gettimeofday(&tv, NULL))
180 return 0;
181 return tv.tv_sec + tv.tv_usec / 1000000.;
182}
183
184size_t
185multifetchworker::writefunction(void *ptr, size_t size)
186{
187 size_t len, cnt;
188 if (_state == WORKER_BROKEN)
189 return size ? 0 : 1;
190
191 double now = currentTime();
192
193 len = size > _size ? _size : size;
194 if (!len)
195 {
196 // kill this job?
197 return size;
198 }
199
200 if (_blkstart && _off == _blkstart)
201 {
202 // make sure that the server replied with "partial content"
203 // for http requests
204 char *effurl;
205 (void)curl_easy_getinfo(_curl, CURLINFO_EFFECTIVE_URL, &effurl);
206 if (effurl && !strncasecmp(effurl, "http", 4))
207 {
208 long statuscode = 0;
209 (void)curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &statuscode);
210 if (statuscode != 206)
211 return size ? 0 : 1;
212 }
213 }
214
215 _blkreceived += len;
216 _received += len;
217
218 _request->_lastprogress = now;
219
220 if (_state == WORKER_DISCARD || !_request->_fp)
221 {
222 // block is no longer needed
223 // still calculate the checksum so that we can throw out bad servers
224 if (_request->_blklist)
225 _dig.update((const char *)ptr, len);
226 _off += len;
227 _size -= len;
228 return size;
229 }
230 if (fseeko(_request->_fp, _off, SEEK_SET))
231 return size ? 0 : 1;
232 cnt = fwrite(ptr, 1, len, _request->_fp);
233 if (cnt > 0)
234 {
235 _request->_fetchedsize += cnt;
236 if (_request->_blklist)
237 _dig.update((const char *)ptr, cnt);
238 _off += cnt;
239 _size -= cnt;
240 if (cnt == len)
241 return size;
242 }
243 return cnt;
244}
245
246size_t
247multifetchworker::_writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
248{
249 multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
250 return me->writefunction(ptr, size * nmemb);
251}
252
253size_t
255{
256 size_t l = size;
257 if (l > 9 && !strncasecmp(p, "Location:", 9))
258 {
259 std::string line(p + 9, l - 9);
260 if (line[l - 10] == '\r')
261 line.erase(l - 10, 1);
262 XXX << "#" << _workerno << ": redirecting to" << line << endl;
263 return size;
264 }
265 if (l <= 14 || l >= 128 || strncasecmp(p, "Content-Range:", 14) != 0)
266 return size;
267 p += 14;
268 l -= 14;
269 while (l && (*p == ' ' || *p == '\t'))
270 p++, l--;
271 if (l < 6 || strncasecmp(p, "bytes", 5))
272 return size;
273 p += 5;
274 l -= 5;
275 char buf[128];
276 memcpy(buf, p, l);
277 buf[l] = 0;
278 unsigned long long start, off, filesize;
279 if (sscanf(buf, "%llu-%llu/%llu", &start, &off, &filesize) != 3)
280 return size;
281 if (_request->_filesize == (off_t)-1)
282 {
283 WAR << "#" << _workerno << ": setting request filesize to " << filesize << endl;
284 _request->_filesize = filesize;
285 if (_request->_totalsize == 0 && !_request->_blklist)
286 _request->_totalsize = filesize;
287 }
288 if (_request->_filesize != (off_t)filesize)
289 {
290 XXX << "#" << _workerno << ": filesize mismatch" << endl;
292 strncpy(_curlError, "filesize mismatch", CURL_ERROR_SIZE);
293 }
294 return size;
295}
296
297size_t
298multifetchworker::_headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
299{
300 multifetchworker *me = reinterpret_cast<multifetchworker *>(stream);
301 return me->headerfunction((char *)ptr, size * nmemb);
302}
303
305: MediaCurl(url, Pathname())
306{
307 _workerno = no;
308 _request = &request;
310 _competing = false;
311 _off = _blkstart = 0;
312 _size = _blksize = 0;
313 _pass = 0;
314 _blkno = 0;
315 _pid = 0;
316 _dnspipe = -1;
317 _blkreceived = 0;
318 _received = 0;
319 _blkstarttime = 0;
320 _avgspeed = 0;
321 _sleepuntil = 0;
323 _noendrange = false;
324
325 Url curlUrl( clearQueryString(url) );
326 _urlbuf = curlUrl.asString();
328 if (_curl)
329 XXX << "reused worker from pool" << endl;
330 if (!_curl && !(_curl = curl_easy_init()))
331 {
333 strncpy(_curlError, "curl_easy_init failed", CURL_ERROR_SIZE);
334 return;
335 }
336 try
337 {
338 setupEasy();
339 }
340 catch (Exception &ex)
341 {
342 curl_easy_cleanup(_curl);
343 _curl = 0;
345 strncpy(_curlError, "curl_easy_setopt failed", CURL_ERROR_SIZE);
346 return;
347 }
348 curl_easy_setopt(_curl, CURLOPT_PRIVATE, this);
349 curl_easy_setopt(_curl, CURLOPT_URL, _urlbuf.c_str());
350 curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, &_writefunction);
351 curl_easy_setopt(_curl, CURLOPT_WRITEDATA, this);
352 if (_request->_filesize == off_t(-1) || !_request->_blklist || !_request->_blklist->haveChecksum(0))
353 {
354 curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, &_headerfunction);
355 curl_easy_setopt(_curl, CURLOPT_HEADERDATA, this);
356 }
357 // if this is the same host copy authorization
358 // (the host check is also what curl does when doing a redirect)
359 // (note also that unauthorized exceptions are thrown with the request host)
361 {
365 if ( _settings.userPassword().size() )
366 {
367 curl_easy_setopt(_curl, CURLOPT_USERPWD, _settings.userPassword().c_str());
368 std::string use_auth = _settings.authType();
369 if (use_auth.empty())
370 use_auth = "digest,basic"; // our default
371 long auth = CurlAuthData::auth_type_str2long(use_auth);
372 if( auth != CURLAUTH_NONE)
373 {
374 XXX << "#" << _workerno << ": Enabling HTTP authentication methods: " << use_auth
375 << " (CURLOPT_HTTPAUTH=" << auth << ")" << std::endl;
376 curl_easy_setopt(_curl, CURLOPT_HTTPAUTH, auth);
377 }
378 }
379 }
380 checkdns();
381}
382
384{
385 if (_curl)
386 {
388 curl_multi_remove_handle(_request->_multi, _curl);
390 {
391#if CURLVERSION_AT_LEAST(7,15,5)
392 curl_easy_setopt(_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)0);
393#endif
394 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
395 curl_easy_setopt(_curl, CURLOPT_WRITEFUNCTION, (void *)0);
396 curl_easy_setopt(_curl, CURLOPT_WRITEDATA, (void *)0);
397 curl_easy_setopt(_curl, CURLOPT_HEADERFUNCTION, (void *)0);
398 curl_easy_setopt(_curl, CURLOPT_HEADERDATA, (void *)0);
400 }
401 else
402 curl_easy_cleanup(_curl);
403 _curl = 0;
404 }
405 if (_pid)
406 {
407 kill(_pid, SIGKILL);
408 int status;
409 while (waitpid(_pid, &status, 0) == -1)
410 if (errno != EINTR)
411 break;
412 _pid = 0;
413 }
414 if (_dnspipe != -1)
415 {
416 close(_dnspipe);
417 _dnspipe = -1;
418 }
419 // the destructor in MediaCurl doesn't call disconnect() if
420 // the media is not attached, so we do it here manually
422}
423
424static inline bool env_isset(std::string name)
425{
426 const char *s = getenv(name.c_str());
427 return s && *s ? true : false;
428}
429
430void
432{
433 std::string host = _url.getHost();
434
435 if (host.empty())
436 return;
437
438 if (_request->_context->isDNSok(host))
439 return;
440
441 // no need to do dns checking for numeric hosts
442 char addrbuf[128];
443 if (inet_pton(AF_INET, host.c_str(), addrbuf) == 1)
444 return;
445 if (inet_pton(AF_INET6, host.c_str(), addrbuf) == 1)
446 return;
447
448 // no need to do dns checking if we use a proxy
449 if (!_settings.proxy().empty())
450 return;
451 if (env_isset("all_proxy") || env_isset("ALL_PROXY"))
452 return;
453 std::string schemeproxy = _url.getScheme() + "_proxy";
454 if (env_isset(schemeproxy))
455 return;
456 if (schemeproxy != "http_proxy")
457 {
458 std::transform(schemeproxy.begin(), schemeproxy.end(), schemeproxy.begin(), ::toupper);
459 if (env_isset(schemeproxy))
460 return;
461 }
462
463 XXX << "checking DNS lookup of " << host << endl;
464 int pipefds[2];
465 if (pipe(pipefds))
466 {
468 strncpy(_curlError, "DNS pipe creation failed", CURL_ERROR_SIZE);
469 return;
470 }
471 _pid = fork();
472 if (_pid == pid_t(-1))
473 {
474 close(pipefds[0]);
475 close(pipefds[1]);
476 _pid = 0;
478 strncpy(_curlError, "DNS checker fork failed", CURL_ERROR_SIZE);
479 return;
480 }
481 else if (_pid == 0)
482 {
483 close(pipefds[0]);
484 // XXX: close all other file descriptors
485 struct addrinfo *ai, aihints;
486 memset(&aihints, 0, sizeof(aihints));
487 aihints.ai_family = PF_UNSPEC;
488 int tstsock = socket(PF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
489 if (tstsock == -1)
490 aihints.ai_family = PF_INET;
491 else
492 close(tstsock);
493 aihints.ai_socktype = SOCK_STREAM;
494 aihints.ai_flags = AI_CANONNAME;
495 unsigned int connecttimeout = _request->_connect_timeout;
496 if (connecttimeout)
497 alarm(connecttimeout);
498 signal(SIGALRM, SIG_DFL);
499 if (getaddrinfo(host.c_str(), NULL, &aihints, &ai))
500 _exit(1);
501 _exit(0);
502 }
503 close(pipefds[1]);
504 _dnspipe = pipefds[0];
506}
507
508void
509multifetchworker::adddnsfd(std::vector<curl_waitfd> &waitFds)
510{
511 if (_state != WORKER_LOOKUP)
512 return;
513
514 waitFds.push_back (
515 curl_waitfd {
516 .fd = _dnspipe,
517 .events = CURL_WAIT_POLLIN,
518 .revents = 0
519 });
520}
521
522void
523multifetchworker::dnsevent( const std::vector<curl_waitfd> &waitFds )
524{
525
526 bool hasEvent = std::any_of( waitFds.begin (), waitFds.end(),[this]( const curl_waitfd &waitfd ){
527 return ( waitfd.fd == _dnspipe && waitfd.revents != 0 );
528 });
529
530 if (_state != WORKER_LOOKUP || !hasEvent)
531 return;
532 int status;
533 while (waitpid(_pid, &status, 0) == -1)
534 {
535 if (errno != EINTR)
536 return;
537 }
538 _pid = 0;
539 if (_dnspipe != -1)
540 {
541 close(_dnspipe);
542 _dnspipe = -1;
543 }
544 if (!WIFEXITED(status))
545 {
547 strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
549 return;
550 }
551 int exitcode = WEXITSTATUS(status);
552 XXX << "#" << _workerno << ": DNS lookup returned " << exitcode << endl;
553 if (exitcode != 0)
554 {
556 strncpy(_curlError, "DNS lookup failed", CURL_ERROR_SIZE);
558 return;
559 }
561 nextjob();
562}
563
564bool
566{
567 // XXX << "checkChecksum block " << _blkno << endl;
568 if (!_blksize || !_request->_blklist)
569 return true;
571}
572
573bool
575{
576 // XXX << "recheckChecksum block " << _blkno << endl;
577 if (!_request->_fp || !_blksize || !_request->_blklist)
578 return true;
579 if (fseeko(_request->_fp, _blkstart, SEEK_SET))
580 return false;
581 char buf[4096];
582 size_t l = _blksize;
583 _request->_blklist->createDigest(_dig); // resets digest
584 while (l)
585 {
586 size_t cnt = l > sizeof(buf) ? sizeof(buf) : l;
587 if (fread(buf, cnt, 1, _request->_fp) != 1)
588 return false;
589 _dig.update(buf, cnt);
590 l -= cnt;
591 }
593}
594
595
596void
598{
599 if (!_request->_stealing)
600 {
601 XXX << "start stealing!" << endl;
602 _request->_stealing = true;
603 }
604 multifetchworker *best = 0;
605 std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
606 double now = 0;
607 for (; workeriter != _request->_workers.end(); ++workeriter)
608 {
609 multifetchworker *worker = *workeriter;
610 if (worker == this)
611 continue;
612 if (worker->_pass == -1)
613 continue; // do not steal!
614 if (worker->_state == WORKER_DISCARD || worker->_state == WORKER_DONE || worker->_state == WORKER_SLEEP || !worker->_blksize)
615 continue; // do not steal finished jobs
616 if (!worker->_avgspeed && worker->_blkreceived)
617 {
618 if (!now)
619 now = currentTime();
620 if (now > worker->_blkstarttime)
621 worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
622 }
623 if (!best || best->_pass > worker->_pass)
624 {
625 best = worker;
626 continue;
627 }
628 if (best->_pass < worker->_pass)
629 continue;
630 // if it is the same block, we want to know the best worker, otherwise the worst
631 if (worker->_blkstart == best->_blkstart)
632 {
633 if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed < (best->_blksize - best->_blkreceived) * worker->_avgspeed)
634 best = worker;
635 }
636 else
637 {
638 if ((worker->_blksize - worker->_blkreceived) * best->_avgspeed > (best->_blksize - best->_blkreceived) * worker->_avgspeed)
639 best = worker;
640 }
641 }
642 if (!best)
643 {
646 _request->_finished = true;
647 return;
648 }
649 // do not sleep twice
650 if (_state != WORKER_SLEEP)
651 {
652 if (!_avgspeed && _blkreceived)
653 {
654 if (!now)
655 now = currentTime();
656 if (now > _blkstarttime)
658 }
659
660 // lets see if we should sleep a bit
661 XXX << "me #" << _workerno << ": " << _avgspeed << ", size " << best->_blksize << endl;
662 XXX << "best #" << best->_workerno << ": " << best->_avgspeed << ", size " << (best->_blksize - best->_blkreceived) << endl;
663 if (_avgspeed && best->_avgspeed && best->_blksize - best->_blkreceived > 0 &&
664 (best->_blksize - best->_blkreceived) * _avgspeed < best->_blksize * best->_avgspeed)
665 {
666 if (!now)
667 now = currentTime();
668 double sl = (best->_blksize - best->_blkreceived) / best->_avgspeed * 2;
669 if (sl > 1)
670 sl = 1;
671 XXX << "#" << _workerno << ": going to sleep for " << sl * 1000 << " ms" << endl;
672 _sleepuntil = now + sl;
675 return;
676 }
677 }
678
679 _competing = true;
680 best->_competing = true;
681 _blkstart = best->_blkstart;
682 _blksize = best->_blksize;
683 best->_pass++;
684 _pass = best->_pass;
685 _blkno = best->_blkno;
686 run();
687}
688
689void
691{
692 std::list<multifetchworker *>::iterator workeriter = _request->_workers.begin();
693 for (; workeriter != _request->_workers.end(); ++workeriter)
694 {
695 multifetchworker *worker = *workeriter;
696 if (worker == this)
697 continue;
698 if (worker->_blkstart == _blkstart)
699 {
700 if (worker->_state == WORKER_FETCH)
701 worker->_state = WORKER_DISCARD;
702 worker->_pass = -1; /* do not steal this one, we already have it */
703 }
704 }
705}
706
707
708void
710{
711 _noendrange = false;
712 if (_request->_stealing)
713 {
714 stealjob();
715 return;
716 }
717
718 MediaBlockList *blklist = _request->_blklist;
719 if (!blklist)
720 {
722 if (_request->_filesize != off_t(-1))
723 {
725 {
726 stealjob();
727 return;
728 }
732 }
733 DBG << "No BLOCKLIST falling back to chunk size: " << _request->_defaultBlksize << std::endl;
734 }
735 else
736 {
737 MediaBlock blk = blklist->getBlock(_request->_blkno);
738 while (_request->_blkoff >= (off_t)(blk.off + blk.size))
739 {
740 if (++_request->_blkno == blklist->numBlocks())
741 {
742 stealjob();
743 return;
744 }
745 blk = blklist->getBlock(_request->_blkno);
746 _request->_blkoff = blk.off;
747 }
748 _blksize = blk.off + blk.size - _request->_blkoff;
750 DBG << "Block: "<< _request->_blkno << " has no checksum falling back to default blocksize: " << _request->_defaultBlksize << std::endl;
752 }
753 }
757 run();
758}
759
760void
762{
763 char rangebuf[128];
764
766 return; // just in case...
767 if (_noendrange)
768 sprintf(rangebuf, "%llu-", (unsigned long long)_blkstart);
769 else
770 sprintf(rangebuf, "%llu-%llu", (unsigned long long)_blkstart, (unsigned long long)_blkstart + _blksize - 1);
771 XXX << "#" << _workerno << ": BLK " << _blkno << ":" << rangebuf << " " << _url << endl;
772 if (curl_easy_setopt(_curl, CURLOPT_RANGE, !_noendrange || _blkstart != 0 ? rangebuf : (char *)0) != CURLE_OK)
773 {
776 strncpy(_curlError, "curl_easy_setopt range failed", CURL_ERROR_SIZE);
777 return;
778 }
779 if (curl_multi_add_handle(_request->_multi, _curl) != CURLM_OK)
780 {
783 strncpy(_curlError, "curl_multi_add_handle failed", CURL_ERROR_SIZE);
784 return;
785 }
786 _request->_havenewjob = true;
787 _off = _blkstart;
788 _size = _blksize;
789 if (_request->_blklist)
790 _request->_blklist->createDigest(_dig); // resets digest
792
793 double now = currentTime();
794 _blkstarttime = now;
795 _blkreceived = 0;
796}
797
798
800
801
802multifetchrequest::multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) : _context(context), _filename(filename), _baseurl(baseurl)
803{
804 _fp = fp;
805 _report = report;
806 _blklist = blklist;
807 _filesize = filesize;
808 _defaultBlksize = makeBlksize( filesize );
809 _multi = multi;
810 _stealing = false;
811 _havenewjob = false;
812 _blkno = 0;
813 if (_blklist)
815 else
816 _blkoff = 0;
817 _activeworkers = 0;
818 _lookupworkers = 0;
819 _sleepworkers = 0;
820 _minsleepuntil = 0;
821 _finished = false;
822 _fetchedsize = 0;
824 _totalsize = 0;
827 _periodavg = 0;
828 _timeout = 0;
830 _maxspeed = 0;
831 _maxworkers = 0;
832 if (blklist)
833 {
834 for (size_t blkno = 0; blkno < blklist->numBlocks(); blkno++)
835 {
836 MediaBlock blk = blklist->getBlock(blkno);
837 _totalsize += blk.size;
838 }
839 }
840 else if (filesize != off_t(-1))
841 _totalsize = filesize;
842}
843
845{
846 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
847 {
848 multifetchworker *worker = *workeriter;
849 *workeriter = NULL;
850 delete worker;
851 }
852 _workers.clear();
853}
854
855void
856multifetchrequest::run(std::vector<Url> &urllist)
857{
858 int workerno = 0;
859 std::vector<Url>::iterator urliter = urllist.begin();
860 for (;;)
861 {
862 // our custom fd's we want to poll
863 std::vector<curl_waitfd> extraWaitFds;
864
865 if (_finished)
866 {
867 XXX << "finished!" << endl;
868 break;
869 }
870
871 if ((int)_activeworkers < _maxworkers && urliter != urllist.end() && _workers.size() < MAXURLS)
872 {
873 // spawn another worker!
874 multifetchworker *worker = new multifetchworker(workerno++, *this, *urliter);
875 _workers.push_back(worker);
876 if (worker->_state != WORKER_BROKEN)
877 {
879 if (worker->_state != WORKER_LOOKUP)
880 {
881 worker->nextjob();
882 }
883 else
885 }
886 ++urliter;
887 continue;
888 }
889 if (!_activeworkers)
890 {
891 WAR << "No more active workers!" << endl;
892 // show the first worker error we find
893 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
894 {
895 if ((*workeriter)->_state != WORKER_BROKEN)
896 continue;
897 ZYPP_THROW(MediaCurlException(_baseurl, "Server error", (*workeriter)->_curlError));
898 }
899 break;
900 }
901
902 if (_lookupworkers)
903 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
904 (*workeriter)->adddnsfd( extraWaitFds );
905
906 // if we added a new job we have to call multi_perform once
907 // to make it show up in the fd set. do not sleep in this case.
908 int timeoutMs = _havenewjob ? 0 : 200;
909 if (_sleepworkers && !_havenewjob) {
910 if (_minsleepuntil == 0) {
911 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter) {
912 multifetchworker *worker = *workeriter;
913 if (worker->_state != WORKER_SLEEP)
914 continue;
915 if (!_minsleepuntil || _minsleepuntil > worker->_sleepuntil)
916 _minsleepuntil = worker->_sleepuntil;
917 }
918 }
919 double sl = _minsleepuntil - currentTime();
920 if (sl < 0) {
921 sl = 0;
922 _minsleepuntil = 0;
923 }
924 if (sl < .2)
925 timeoutMs = sl * 1000;
926 }
927
928 int r = 0;
929#if CURLVERSION_AT_LEAST(7,66,0)
930 CURLMcode mcode = curl_multi_poll( _multi, extraWaitFds.data(), extraWaitFds.size(), timeoutMs, &r );
931#else
932 CURLMcode mcode = curl_multi_wait( _multi, extraWaitFds.data(), extraWaitFds.size(), timeoutMs, &r );
933#endif
934 if ( mcode != CURLM_OK ) {
935 ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_poll() failed", str::Str() << "curl_multi_poll() returned CurlMcode: " << mcode ));
936 }
937 if (r == -1 && errno != EINTR)
938 ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_poll() failed", "unknown error"));
939 if (r != 0 && _lookupworkers)
940 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
941 {
942 multifetchworker *worker = *workeriter;
943 if (worker->_state != WORKER_LOOKUP)
944 continue;
945 (*workeriter)->dnsevent( extraWaitFds );
946 if (worker->_state != WORKER_LOOKUP)
948 }
949 _havenewjob = false;
950
951 // run curl
952 for (;;)
953 {
954 CURLMcode mcode;
955 int tasks;
956 mcode = curl_multi_perform(_multi, &tasks);
957 if (mcode == CURLM_CALL_MULTI_PERFORM)
958 continue;
959 if (mcode != CURLM_OK)
960 ZYPP_THROW(MediaCurlException(_baseurl, "curl_multi_perform", "unknown error"));
961 break;
962 }
963
964 double now = currentTime();
965
966 // update periodavg
967 if (now > _lastperiodstart + .5)
968 {
969 if (!_periodavg)
971 else
974 _lastperiodstart = now;
975 }
976
977 // wake up sleepers
978 if (_sleepworkers)
979 {
980 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
981 {
982 multifetchworker *worker = *workeriter;
983 if (worker->_state != WORKER_SLEEP)
984 continue;
985 if (worker->_sleepuntil > now)
986 continue;
987 if (_minsleepuntil == worker->_sleepuntil)
988 _minsleepuntil = 0;
989 XXX << "#" << worker->_workerno << ": sleep done, wake up" << endl;
991 // nextjob chnages the state
992 worker->nextjob();
993 }
994 }
995
996 // collect all curl results, reschedule new jobs
997 CURLMsg *msg;
998 int nqueue;
999 while ((msg = curl_multi_info_read(_multi, &nqueue)) != 0)
1000 {
1001 if (msg->msg != CURLMSG_DONE)
1002 continue;
1003 CURL *easy = msg->easy_handle;
1004 CURLcode cc = msg->data.result;
1005 multifetchworker *worker;
1006 if (curl_easy_getinfo(easy, CURLINFO_PRIVATE, &worker) != CURLE_OK)
1007 ZYPP_THROW(MediaCurlException(_baseurl, "curl_easy_getinfo", "unknown error"));
1008 if (worker->_blkreceived && now > worker->_blkstarttime)
1009 {
1010 if (worker->_avgspeed)
1011 worker->_avgspeed = (worker->_avgspeed + worker->_blkreceived / (now - worker->_blkstarttime)) / 2;
1012 else
1013 worker->_avgspeed = worker->_blkreceived / (now - worker->_blkstarttime);
1014 }
1015 XXX << "#" << worker->_workerno << ": BLK " << worker->_blkno << " done code " << cc << " speed " << worker->_avgspeed << endl;
1016 curl_multi_remove_handle(_multi, easy);
1017 if (cc == CURLE_HTTP_RETURNED_ERROR)
1018 {
1019 long statuscode = 0;
1020 (void)curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &statuscode);
1021 XXX << "HTTP status " << statuscode << endl;
1022 if (statuscode == 416 && !_blklist) /* Range error */
1023 {
1024 if (_filesize == off_t(-1))
1025 {
1026 if (!worker->_noendrange)
1027 {
1028 XXX << "#" << worker->_workerno << ": retrying with no end range" << endl;
1029 worker->_noendrange = true;
1030 worker->run();
1031 continue;
1032 }
1033 worker->_noendrange = false;
1034 worker->stealjob();
1035 continue;
1036 }
1037 if (worker->_blkstart >= _filesize)
1038 {
1039 worker->nextjob();
1040 continue;
1041 }
1042 }
1043 }
1044 if (cc == 0)
1045 {
1046 if (!worker->checkChecksum())
1047 {
1048 WAR << "#" << worker->_workerno << ": checksum error, disable worker" << endl;
1049 worker->_state = WORKER_BROKEN;
1050 strncpy(worker->_curlError, "checksum error", CURL_ERROR_SIZE);
1052 continue;
1053 }
1054 if (worker->_state == WORKER_FETCH)
1055 {
1056 if (worker->_competing)
1057 {
1058 worker->disableCompetition();
1059 // multiple workers wrote into this block. We already know that our
1060 // data was correct, but maybe some other worker overwrote our data
1061 // with something broken. Thus we have to re-check the block.
1062 if (!worker->recheckChecksum())
1063 {
1064 XXX << "#" << worker->_workerno << ": recheck checksum error, refetch block" << endl;
1065 // re-fetch! No need to worry about the bad workers,
1066 // they will now be set to DISCARD. At the end of their block
1067 // they will notice that they wrote bad data and go into BROKEN.
1068 worker->run();
1069 continue;
1070 }
1071 }
1072 _fetchedgoodsize += worker->_blksize;
1073 }
1074
1075 // make bad workers sleep a little
1076 double maxavg = 0;
1077 int maxworkerno = 0;
1078 int numbetter = 0;
1079 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1080 {
1081 multifetchworker *oworker = *workeriter;
1082 if (oworker->_state == WORKER_BROKEN)
1083 continue;
1084 if (oworker->_avgspeed > maxavg)
1085 {
1086 maxavg = oworker->_avgspeed;
1087 maxworkerno = oworker->_workerno;
1088 }
1089 if (oworker->_avgspeed > worker->_avgspeed)
1090 numbetter++;
1091 }
1092 if (maxavg && !_stealing)
1093 {
1094 double ratio = worker->_avgspeed / maxavg;
1095 ratio = 1 - ratio;
1096 if (numbetter < 3) // don't sleep that much if we're in the top two
1097 ratio = ratio * ratio;
1098 if (ratio > .01)
1099 {
1100 XXX << "#" << worker->_workerno << ": too slow ("<< ratio << ", " << worker->_avgspeed << ", #" << maxworkerno << ": " << maxavg << "), going to sleep for " << ratio * 1000 << " ms" << endl;
1101 worker->_sleepuntil = now + ratio;
1102 worker->_state = WORKER_SLEEP;
1103 _sleepworkers++;
1104 continue;
1105 }
1106 }
1107
1108 // do rate control (if requested)
1109 // should use periodavg, but that's not what libcurl does
1110 if (_maxspeed && now > _starttime)
1111 {
1112 double avg = _fetchedsize / (now - _starttime);
1113 avg = worker->_maxspeed * _maxspeed / avg;
1114 if (avg < _maxspeed / _maxworkers)
1115 avg = _maxspeed / _maxworkers;
1116 if (avg > _maxspeed)
1117 avg = _maxspeed;
1118 if (avg < 1024)
1119 avg = 1024;
1120 worker->_maxspeed = avg;
1121#if CURLVERSION_AT_LEAST(7,15,5)
1122 curl_easy_setopt(worker->_curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(avg));
1123#endif
1124 }
1125
1126 worker->nextjob();
1127 }
1128 else
1129 {
1130 worker->_state = WORKER_BROKEN;
1132 if (!_activeworkers && !(urliter != urllist.end() && _workers.size() < MAXURLS))
1133 {
1134 // end of workers reached! goodbye!
1135 worker->evaluateCurlCode(Pathname(), cc, false);
1136 }
1137 }
1138
1139 if ( _filesize > 0 && _fetchedgoodsize > _filesize ) {
1141 }
1142 }
1143
1144 // send report
1145 if (_report)
1146 {
1147 int percent = _totalsize ? (100 * (_fetchedgoodsize + _fetchedsize)) / (_totalsize + _fetchedsize) : 0;
1148
1149 double avg = 0;
1150 if (now > _starttime)
1151 avg = _fetchedsize / (now - _starttime);
1152 if (!(*(_report))->progress(percent, _baseurl, avg, _lastperiodstart == _starttime ? avg : _periodavg))
1153 ZYPP_THROW(MediaCurlException(_baseurl, "User abort", "cancelled"));
1154 }
1155
1156 if (_timeout && now - _lastprogress > _timeout)
1157 break;
1158 }
1159
1160 if (!_finished)
1162
1163 // print some download stats
1164 WAR << "overall result" << endl;
1165 for (std::list<multifetchworker *>::iterator workeriter = _workers.begin(); workeriter != _workers.end(); ++workeriter)
1166 {
1167 multifetchworker *worker = *workeriter;
1168 WAR << "#" << worker->_workerno << ": state: " << worker->_state << " received: " << worker->_received << " url: " << worker->_url << endl;
1169 }
1170}
1171
1172inline size_t multifetchrequest::makeBlksize ( size_t filesize )
1173{
1174 // this case should never happen because we never start a multi download if we do not know the filesize beforehand
1175 if ( filesize == 0 ) return 2 * 1024 * 1024;
1176 else if ( filesize < 2*256*1024 ) return filesize;
1177 else if ( filesize < 8*1024*1024 ) return 256*1024;
1178 else if ( filesize < 256*1024*1024 ) return 1024*1024;
1179 return 4*1024*1024;
1180}
1181
1183
1184
1185MediaMultiCurl::MediaMultiCurl(const Url &url_r, const Pathname & attach_point_hint_r)
1186 : MediaCurl(url_r, attach_point_hint_r)
1187{
1188 MIL << "MediaMultiCurl::MediaMultiCurl(" << url_r << ", " << attach_point_hint_r << ")" << endl;
1189 _multi = 0;
1191}
1192
1194{
1196 {
1197 curl_slist_free_all(_customHeadersMetalink);
1199 }
1200 if (_multi)
1201 {
1202 curl_multi_cleanup(_multi);
1203 _multi = 0;
1204 }
1205 std::map<std::string, CURL *>::iterator it;
1206 for (it = _easypool.begin(); it != _easypool.end(); it++)
1207 {
1208 CURL *easy = it->second;
1209 if (easy)
1210 {
1211 curl_easy_cleanup(easy);
1212 it->second = NULL;
1213 }
1214 }
1215}
1216
1218{
1220
1222 {
1223 curl_slist_free_all(_customHeadersMetalink);
1225 }
1226 struct curl_slist *sl = _customHeaders;
1227 for (; sl; sl = sl->next)
1228 _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, sl->data);
1229 _customHeadersMetalink = curl_slist_append(_customHeadersMetalink, "Accept: */*, application/metalink+xml, application/metalink4+xml");
1230}
1231
1232static bool looks_like_metalink_fd(int fd)
1233{
1234 char buf[256], *p;
1235 int l;
1236 while ((l = pread(fd, buf, sizeof(buf) - 1, (off_t)0)) == -1 && errno == EINTR)
1237 ;
1238 if (l == -1)
1239 return 0;
1240 buf[l] = 0;
1241 p = buf;
1242 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1243 p++;
1244 if (!strncasecmp(p, "<?xml", 5))
1245 {
1246 while (*p && *p != '>')
1247 p++;
1248 if (*p == '>')
1249 p++;
1250 while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
1251 p++;
1252 }
1253 bool ret = !strncasecmp(p, "<metalink", 9) ? true : false;
1254 return ret;
1255}
1256
1257static bool looks_like_metalink(const Pathname & file)
1258{
1259 int fd;
1260 if ((fd = open(file.asString().c_str(), O_RDONLY|O_CLOEXEC)) == -1)
1261 return false;
1262 bool ret = looks_like_metalink_fd(fd);
1263 close(fd);
1264 DBG << "looks_like_metalink(" << file << "): " << ret << endl;
1265 return ret;
1266}
1267
1268// here we try to suppress all progress coming from a metalink download
1269// bsc#1021291: Nevertheless send alive trigger (without stats), so UIs
1270// are able to abort a hanging metalink download via callback response.
1271int MediaMultiCurl::progressCallback( void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
1272{
1274 if (!_curl)
1275 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1276
1277 // bsc#408814: Don't report any sizes before we don't have data on disk. Data reported
1278 // due to redirection etc. are not interesting, but may disturb filesize checks.
1279 FILE *fp = 0;
1280 if ( curl_easy_getinfo( _curl, CURLINFO_PRIVATE, &fp ) != CURLE_OK || !fp )
1281 return MediaCurl::aliveCallback( clientp, dltotal, dlnow, ultotal, ulnow );
1282 if ( ftell( fp ) == 0 )
1283 return MediaCurl::aliveCallback( clientp, dltotal, 0.0, ultotal, ulnow );
1284
1285 // (no longer needed due to the filesize check above?)
1286 // work around curl bug that gives us old data
1287 long httpReturnCode = 0;
1288 if (curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode ) != CURLE_OK || httpReturnCode == 0)
1289 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1290
1291 char *ptr = NULL;
1292 bool ismetalink = false;
1293 if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1294 {
1295 std::string ct = std::string(ptr);
1296 if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1297 ismetalink = true;
1298 }
1299 if (!ismetalink && dlnow < 256)
1300 {
1301 // can't tell yet, ...
1302 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1303 }
1304 if (!ismetalink)
1305 {
1306 fflush(fp);
1307 ismetalink = looks_like_metalink_fd(fileno(fp));
1308 DBG << "looks_like_metalink_fd: " << ismetalink << endl;
1309 }
1310 if (ismetalink)
1311 {
1312 // this is a metalink file change the expected filesize
1314 // we're downloading the metalink file. Just trigger aliveCallbacks
1315 curl_easy_setopt(_curl, CURLOPT_XFERINFOFUNCTION, &MediaCurl::aliveCallback);
1316 return MediaCurl::aliveCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1317 }
1318 curl_easy_setopt(_curl, CURLOPT_XFERINFOFUNCTION, &MediaCurl::progressCallback);
1319 return MediaCurl::progressCallback(clientp, dltotal, dlnow, ultotal, ulnow);
1320}
1321
1322void MediaMultiCurl::doGetFileCopy( const OnMediaLocation &srcFile , const Pathname & target, callback::SendReport<DownloadProgressReport> & report, RequestOptions options ) const
1323{
1324 Pathname dest = target.absolutename();
1325 if( assert_dir( dest.dirname() ) )
1326 {
1327 DBG << "assert_dir " << dest.dirname() << " failed" << endl;
1328 ZYPP_THROW( MediaSystemException(getFileUrl(srcFile.filename()), "System error on " + dest.dirname().asString()) );
1329 }
1330
1331 ManagedFile destNew { target.extend( ".new.zypp.XXXXXX" ) };
1332 AutoFILE file;
1333 {
1334 AutoFREE<char> buf { ::strdup( (*destNew).c_str() ) };
1335 if( ! buf )
1336 {
1337 ERR << "out of memory for temp file name" << endl;
1338 ZYPP_THROW(MediaSystemException(getFileUrl(srcFile.filename()), "out of memory for temp file name"));
1339 }
1340
1341 AutoFD tmp_fd { ::mkostemp( buf, O_CLOEXEC ) };
1342 if( tmp_fd == -1 )
1343 {
1344 ERR << "mkstemp failed for file '" << destNew << "'" << endl;
1346 }
1347 destNew = ManagedFile( (*buf), filesystem::unlink );
1348
1349 file = ::fdopen( tmp_fd, "we" );
1350 if ( ! file )
1351 {
1352 ERR << "fopen failed for file '" << destNew << "'" << endl;
1354 }
1355 tmp_fd.resetDispose(); // don't close it here! ::fdopen moved ownership to file
1356 }
1357
1358 DBG << "dest: " << dest << endl;
1359 DBG << "temp: " << destNew << endl;
1360
1361 // set IFMODSINCE time condition (no download if not modified)
1362 if( PathInfo(target).isExist() && !(options & OPTION_NO_IFMODSINCE) )
1363 {
1364 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
1365 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, (long)PathInfo(target).mtime());
1366 }
1367 else
1368 {
1369 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1370 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1371 }
1372 // change header to include Accept: metalink
1373 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeadersMetalink);
1374 // change to our own progress funcion
1375 curl_easy_setopt(_curl, CURLOPT_XFERINFOFUNCTION, &progressCallback);
1376 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (*file) ); // important to pass the FILE* explicitly (passing through varargs)
1377 try
1378 {
1379 MediaCurl::doGetFileCopyFile( srcFile, dest, file, report, options );
1380 }
1381 catch (Exception &ex)
1382 {
1383 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1384 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1385 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1386 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1387 ZYPP_RETHROW(ex);
1388 }
1389 curl_easy_setopt(_curl, CURLOPT_TIMECONDITION, CURL_TIMECOND_NONE);
1390 curl_easy_setopt(_curl, CURLOPT_TIMEVALUE, 0L);
1391 curl_easy_setopt(_curl, CURLOPT_HTTPHEADER, _customHeaders);
1392 curl_easy_setopt(_curl, CURLOPT_PRIVATE, (void *)0);
1393 long httpReturnCode = 0;
1394 CURLcode infoRet = curl_easy_getinfo(_curl, CURLINFO_RESPONSE_CODE, &httpReturnCode);
1395 if (infoRet == CURLE_OK)
1396 {
1397 DBG << "HTTP response: " + str::numstring(httpReturnCode) << endl;
1398 if ( httpReturnCode == 304
1399 || ( httpReturnCode == 213 && _url.getScheme() == "ftp" ) ) // not modified
1400 {
1401 DBG << "not modified: " << PathInfo(dest) << endl;
1402 return;
1403 }
1404 }
1405 else
1406 {
1407 WAR << "Could not get the response code." << endl;
1408 }
1409
1410 bool ismetalink = false;
1411
1412 char *ptr = NULL;
1413 if (curl_easy_getinfo(_curl, CURLINFO_CONTENT_TYPE, &ptr) == CURLE_OK && ptr)
1414 {
1415 std::string ct = std::string(ptr);
1416 if (ct.find("application/metalink+xml") == 0 || ct.find("application/metalink4+xml") == 0)
1417 ismetalink = true;
1418 }
1419
1420 if (!ismetalink)
1421 {
1422 // some proxies do not store the content type, so also look at the file to find
1423 // out if we received a metalink (bnc#649925)
1424 fflush(file);
1425 if (looks_like_metalink(destNew))
1426 ismetalink = true;
1427 }
1428
1429 if (ismetalink)
1430 {
1431 bool userabort = false;
1432 Pathname failedFile = ZConfig::instance().repoCachePath() / "MultiCurl.failed";
1433 file = nullptr; // explicitly close destNew before the parser reads it.
1434 try
1435 {
1436 MetaLinkParser mlp;
1437 mlp.parse(destNew);
1438 MediaBlockList bl = mlp.getBlockList();
1439
1440 /*
1441 * gihub issue libzipp:#277 Multicurl backend breaks with MirrorCache and Metalink with unknown filesize.
1442 * Fall back to a normal download if we have no knowledge about the filesize we want to download.
1443 */
1444 if ( !bl.haveFilesize() && ! srcFile.downloadSize() ) {
1445 XXX << "No filesize in metalink file and no expected filesize, aborting multicurl." << std::endl;
1446 ZYPP_THROW( MediaException("Multicurl requires filesize but none was provided.") );
1447 }
1448
1449 std::vector<Url> urls = mlp.getUrls();
1450 /*
1451 * bsc#1191609 In certain locations we do not receive a suitable number of metalink mirrors, and might even
1452 * download chunks serially from one and the same server. In those cases we need to fall back to a normal download.
1453 */
1454 if ( urls.size() < MIN_REQ_MIRRS ) {
1455 ZYPP_THROW( MediaException("Multicurl enabled but not enough mirrors provided") );
1456 }
1457
1458 XXX << bl << endl;
1459 file = fopen((*destNew).c_str(), "w+e");
1460 if (!file)
1462 if (PathInfo(target).isExist())
1463 {
1464 XXX << "reusing blocks from file " << target << endl;
1465 bl.reuseBlocks(file, target.asString());
1466 XXX << bl << endl;
1467 }
1468 if (bl.haveChecksum(1) && PathInfo(failedFile).isExist())
1469 {
1470 XXX << "reusing blocks from file " << failedFile << endl;
1471 bl.reuseBlocks(file, failedFile.asString());
1472 XXX << bl << endl;
1473 filesystem::unlink(failedFile);
1474 }
1475 Pathname df = srcFile.deltafile();
1476 if (!df.empty())
1477 {
1478 XXX << "reusing blocks from file " << df << endl;
1479 bl.reuseBlocks(file, df.asString());
1480 XXX << bl << endl;
1481 }
1482 try
1483 {
1484 multifetch(srcFile.filename(), file, &urls, &report, &bl, srcFile.downloadSize());
1485 }
1486 catch (MediaCurlException &ex)
1487 {
1488 userabort = ex.errstr() == "User abort";
1489 ZYPP_RETHROW(ex);
1490 }
1491 }
1492 catch (MediaFileSizeExceededException &ex) {
1493 ZYPP_RETHROW(ex);
1494 }
1495 catch (Exception &ex)
1496 {
1497 // something went wrong. fall back to normal download
1498 file = nullptr; // explicitly close destNew before moving it
1499 if (PathInfo(destNew).size() >= 63336)
1500 {
1501 ::unlink(failedFile.asString().c_str());
1502 filesystem::hardlinkCopy(destNew, failedFile);
1503 }
1504 if (userabort)
1505 {
1506 ZYPP_RETHROW(ex);
1507 }
1508 file = fopen((*destNew).c_str(), "w+e");
1509 if (!file)
1511
1512 // use the default progressCallback
1513 curl_easy_setopt(_curl, CURLOPT_XFERINFOFUNCTION, &MediaCurl::progressCallback);
1514 MediaCurl::doGetFileCopyFile(srcFile, dest, file, report, options | OPTION_NO_REPORT_START);
1515 }
1516 }
1517
1518 if (::fchmod( ::fileno(file), filesystem::applyUmaskTo( 0644 )))
1519 {
1520 ERR << "Failed to chmod file " << destNew << endl;
1521 }
1522
1523 file.resetDispose(); // we're going to close it manually here
1524 if (::fclose(file))
1525 {
1526 filesystem::unlink(destNew);
1527 ERR << "Fclose failed for file '" << destNew << "'" << endl;
1529 }
1530
1531 if ( rename( destNew, dest ) != 0 )
1532 {
1533 ERR << "Rename failed" << endl;
1535 }
1536 destNew.resetDispose(); // no more need to unlink it
1537
1538 DBG << "done: " << PathInfo(dest) << endl;
1539}
1540
1541void MediaMultiCurl::multifetch(const Pathname & filename, FILE *fp, std::vector<Url> *urllist, callback::SendReport<DownloadProgressReport> *report, MediaBlockList *blklist, off_t filesize) const
1542{
1543 Url baseurl(getFileUrl(filename));
1544 if (blklist && filesize == off_t(-1) && blklist->haveFilesize())
1545 filesize = blklist->getFilesize();
1546 if (blklist && !blklist->haveBlocks() && filesize != 0)
1547 blklist = 0;
1548 if (blklist && (filesize == 0 || !blklist->numBlocks()))
1549 {
1550 checkFileDigest(baseurl, fp, blklist);
1551 return;
1552 }
1553 if (filesize == 0)
1554 return;
1555 if (!_multi)
1556 {
1557 _multi = curl_multi_init();
1558 if (!_multi)
1560 }
1561
1562 multifetchrequest req(this, filename, baseurl, _multi, fp, report, blklist, filesize);
1563 req._timeout = _settings.timeout();
1567 if (req._maxworkers > MAXURLS)
1568 req._maxworkers = MAXURLS;
1569 if (req._maxworkers <= 0)
1570 req._maxworkers = 1;
1571 std::vector<Url> myurllist;
1572 for (std::vector<Url>::iterator urliter = urllist->begin(); urliter != urllist->end(); ++urliter)
1573 {
1574 try
1575 {
1576 std::string scheme = urliter->getScheme();
1577 if (scheme == "http" || scheme == "https" || scheme == "ftp" || scheme == "tftp")
1578 {
1579 checkProtocol(*urliter);
1580 myurllist.push_back(internal::propagateQueryParams(*urliter, _url));
1581 }
1582 }
1583 catch (...)
1584 {
1585 }
1586 }
1587 if (!myurllist.size())
1588 myurllist.push_back(baseurl);
1589 req.run(myurllist);
1590 checkFileDigest(baseurl, fp, blklist);
1591}
1592
1593void MediaMultiCurl::checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
1594{
1595 if (!blklist || !blklist->haveFileChecksum())
1596 return;
1597 if (fseeko(fp, off_t(0), SEEK_SET))
1598 ZYPP_THROW(MediaCurlException(url, "fseeko", "seek error"));
1599 Digest dig;
1600 blklist->createFileDigest(dig);
1601 char buf[4096];
1602 size_t l;
1603 while ((l = fread(buf, 1, sizeof(buf), fp)) > 0)
1604 dig.update(buf, l);
1605 if (!blklist->verifyFileDigest(dig))
1606 ZYPP_THROW(MediaCurlException(url, "file verification failed", "checksum error"));
1607}
1608
1609bool MediaMultiCurl::isDNSok(const std::string &host) const
1610{
1611 return _dnsok.find(host) == _dnsok.end() ? false : true;
1612}
1613
1614void MediaMultiCurl::setDNSok(const std::string &host) const
1615{
1616 _dnsok.insert(host);
1617}
1618
1619CURL *MediaMultiCurl::fromEasyPool(const std::string &host) const
1620{
1621 if (_easypool.find(host) == _easypool.end())
1622 return 0;
1623 CURL *ret = _easypool[host];
1624 _easypool.erase(host);
1625 return ret;
1626}
1627
1628void MediaMultiCurl::toEasyPool(const std::string &host, CURL *easy) const
1629{
1630 CURL *oldeasy = _easypool[host];
1631 _easypool[host] = easy;
1632 if (oldeasy)
1633 curl_easy_cleanup(oldeasy);
1634}
1635
1636 } // namespace media
1637} // namespace zypp
std::optional< KeyManagerCtx > _context
Definition: KeyRing.cc:157
#define WORKER_STARTING
#define WORKER_DISCARD
#define WORKER_DONE
#define WORKER_LOOKUP
#define WORKER_FETCH
#define WORKER_SLEEP
#define WORKER_BROKEN
void resetDispose()
Set no dispose function.
Definition: AutoDispose.h:180
Store and operate with byte count.
Definition: ByteCount.h:31
static const Unit MB
1000^2 Byte
Definition: ByteCount.h:60
std::string asString(unsigned field_width_r=0, unsigned unit_width_r=1) const
Auto selected Unit and precision.
Definition: ByteCount.h:133
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:37
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:279
Base class for Exception.
Definition: Exception.h:146
Describes a resource file located on a medium.
const ByteCount & downloadSize() const
The size of the resource on the server.
const Pathname & filename() const
The path to the resource on the medium.
const Pathname & deltafile() const
The existing deltafile that can be used to reduce download size ( zchunk or metalink )
Url manipulation class.
Definition: Url.h:92
std::string getScheme() const
Returns the scheme name of the URL.
Definition: Url.cc:533
std::string asString() const
Returns a default string representation of the Url object.
Definition: Url.cc:497
std::string getHost(EEncoding eflag=zypp::url::E_DECODED) const
Returns the hostname or IP from the URL authority.
Definition: Url.cc:588
Pathname repoCachePath() const
Path where the caches are kept (/var/cache/zypp)
Definition: ZConfig.cc:1039
static ZConfig & instance()
Singleton ctor.
Definition: ZConfig.cc:922
Wrapper class for stat/lstat.
Definition: PathInfo.h:221
Pathname extend(const std::string &r) const
Append string r to the last component of the path.
Definition: Pathname.h:173
Pathname dirname() const
Return all but the last component od this path.
Definition: Pathname.h:124
const std::string & asString() const
String representation.
Definition: Pathname.h:91
Pathname absolutename() const
Return this path, adding a leading '/' if relative.
Definition: Pathname.h:139
static long auth_type_str2long(std::string &auth_type_str)
Converts a string of comma separated list of authetication type names into a long of ORed CURLAUTH_* ...
Definition: curlauthdata.cc:50
bool haveChecksum(size_t blkno) const
void reuseBlocks(FILE *wfp, std::string filename)
bool createDigest(Digest &digest) const
const MediaBlock & getBlock(size_t blkno) const
return the offset/size of a block with number blkno
bool verifyDigest(size_t blkno, Digest &digest) const
bool haveBlocks() const
do we have a blocklist describing the file? set to true when addBlock() is called
bool createFileDigest(Digest &digest) const
size_t numBlocks() const
return the number of blocks in the blocklist
bool verifyFileDigest(Digest &digest) const
Implementation class for FTP, HTTP and HTTPS MediaHandler.
Definition: MediaCurl.h:32
virtual void setupEasy()
initializes the curl easy handle with the data from the url
Definition: MediaCurl.cc:406
@ OPTION_NO_IFMODSINCE
to not add a IFMODSINCE header if target exists
Definition: MediaCurl.h:43
@ OPTION_NO_REPORT_START
do not send a start ProgressReport
Definition: MediaCurl.h:45
static void resetExpectedFileSize(void *clientp, const ByteCount &expectedFileSize)
MediaMultiCurl needs to reset the expected filesize in case a metalink file is downloaded otherwise t...
Definition: MediaCurl.cc:1404
static int progressCallback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
Callback reporting download progress.
Definition: MediaCurl.cc:1361
Url clearQueryString(const Url &url) const
Definition: MediaCurl.cc:369
char _curlError[CURL_ERROR_SIZE]
Definition: MediaCurl.h:167
void doGetFileCopyFile(const OnMediaLocation &srcFile, const Pathname &dest, FILE *file, callback::SendReport< DownloadProgressReport > &report, RequestOptions options=OPTION_NONE) const
Definition: MediaCurl.cc:1198
void checkProtocol(const Url &url) const
check the url is supported by the curl library
Definition: MediaCurl.cc:381
void evaluateCurlCode(const zypp::Pathname &filename, CURLcode code, bool timeout) const
Evaluates a curl return code and throws the right MediaException filename Filename being downloaded c...
Definition: MediaCurl.cc:833
static CURL * progressCallback_getcurl(void *clientp)
Definition: MediaCurl.cc:1377
static int aliveCallback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
Callback sending just an alive trigger to the UI, without stats (e.g.
Definition: MediaCurl.cc:1347
virtual void disconnectFrom() override
Definition: MediaCurl.cc:705
curl_slist * _customHeaders
Definition: MediaCurl.h:168
Just inherits Exception to separate media exceptions.
Url url() const
Url used.
Definition: MediaHandler.h:503
const Url _url
Url to handle.
Definition: MediaHandler.h:113
virtual void setupEasy() override
initializes the curl easy handle with the data from the url
std::map< std::string, CURL * > _easypool
curl_slist * _customHeadersMetalink
void multifetch(const Pathname &filename, FILE *fp, std::vector< Url > *urllist, callback::SendReport< DownloadProgressReport > *report=0, MediaBlockList *blklist=0, off_t filesize=off_t(-1)) const
void setDNSok(const std::string &host) const
MediaMultiCurl(const Url &url_r, const Pathname &attach_point_hint_r)
std::set< std::string > _dnsok
bool isDNSok(const std::string &host) const
static int progressCallback(void *clientp, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow)
CURL * fromEasyPool(const std::string &host) const
virtual void doGetFileCopy(const OnMediaLocation &srcFile, const Pathname &targetFilename, callback::SendReport< DownloadProgressReport > &_report, RequestOptions options=OPTION_NONE) const override
void checkFileDigest(Url &url, FILE *fp, MediaBlockList *blklist) const
void toEasyPool(const std::string &host, CURL *easy) const
Url getFileUrl(const Pathname &filename) const
concatenate the attach url and the filename to a complete download url
void parse(const Pathname &filename)
parse a file consisting of metalink xml data
MediaBlockList getBlockList() const
return the block list from the parsed metalink data
std::vector< Url > getUrls() const
return the download urls from the parsed metalink data
const std::string & password() const
auth password
long maxDownloadSpeed() const
Maximum download speed (bytes per second)
long connectTimeout() const
connection timeout
const std::string & authType() const
get the allowed authentication types
long timeout() const
transfer timeout
void setUsername(const std::string &val_r)
sets the auth username
std::string userPassword() const
returns the user and password as a user:pass string
const std::string & proxy() const
proxy host
long maxConcurrentConnections() const
Maximum number of concurrent connections for a single transfer.
void setPassword(const std::string &val_r)
sets the auth password
const std::string & username() const
auth username
void setAuthType(const std::string &val_r)
set the allowed authentication types
callback::SendReport< DownloadProgressReport > * _report
void run(std::vector< Url > &urllist)
std::list< multifetchworker * > _workers
multifetchrequest(const MediaMultiCurl *context, const Pathname &filename, const Url &baseurl, CURLM *multi, FILE *fp, callback::SendReport< DownloadProgressReport > *report, MediaBlockList *blklist, off_t filesize)
static size_t makeBlksize(size_t filesize)
const MediaMultiCurl * _context
size_t headerfunction(char *ptr, size_t size)
multifetchworker(int no, multifetchrequest &request, const Url &url)
multifetchrequest * _request
size_t writefunction(void *ptr, size_t size)
static size_t _writefunction(void *ptr, size_t size, size_t nmemb, void *stream)
static size_t _headerfunction(void *ptr, size_t size, size_t nmemb, void *stream)
void dnsevent(const std::vector< curl_waitfd > &waitFds)
void adddnsfd(std::vector< curl_waitfd > &waitFds)
zypp::Url propagateQueryParams(zypp::Url url_r, const zypp::Url &template_r)
Definition: curlhelper.cc:394
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition: PathInfo.h:789
int unlink(const Pathname &path)
Like 'unlink'.
Definition: PathInfo.cc:700
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition: PathInfo.cc:883
static bool looks_like_metalink(const Pathname &file)
static bool looks_like_metalink_fd(int fd)
constexpr auto MAXURLS
static bool env_isset(std::string name)
static double currentTime()
constexpr auto MIN_REQ_MIRRS
std::string numstring(char n, int w=0)
Definition: String.h:289
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:2
AutoDispose< const Pathname > ManagedFile
A Pathname plus associated cleanup code to be executed when path is no longer needed.
Definition: ManagedFile.h:27
AutoDispose<int> calling ::close
Definition: AutoDispose.h:302
AutoDispose<FILE*> calling ::fclose
Definition: AutoDispose.h:313
a single block from the blocklist, consisting of an offset and a size
Convenient building of std::string via std::ostringstream Basically a std::ostringstream autoconverti...
Definition: String.h:212
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition: Exception.h:440
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:428
#define DBG
Definition: Logger.h:95
#define MIL
Definition: Logger.h:96
#define ERR
Definition: Logger.h:98
#define WAR
Definition: Logger.h:97
#define XXX
Definition: Logger.h:94