42 #ifndef CONFIG_H_INCLUDED 43 #define CONFIG_H_INCLUDED 60 #define HAVE_UNORDERED_MAP 61 #define HAVE_UNORDERED_SET 64 #ifdef HAVE_UNORDERED_MAP 65 #include <unordered_map> 66 using std::unordered_map;
67 #elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP) 68 #include <boost/unordered_map.hpp> 69 using boost::unordered_map;
71 #include <tr1/unordered_map> 72 using std::tr1::unordered_map;
74 #ifdef HAVE_UNORDERED_SET 75 #include <unordered_set> 76 using std::unordered_set;
77 #elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP) 78 #include <boost/unordered_set.hpp> 79 using boost::unordered_set;
81 #include <tr1/unordered_set> 82 using std::tr1::unordered_set;
85 #if defined(HAVE_BUILTIN_POPCOUNT) 86 static inline int popcount(
unsigned int x) {
87 return __builtin_popcount(x);
89 #elif defined(HAVE_POPCNT) 91 static inline int popcount(
unsigned int x) {
95 static inline int popcount(
unsigned int x) {
98 if ((x & 1) == 1) count++;
116 const WORD OPER_ADD = -1;
117 const WORD OPER_MUL = -2;
118 const WORD OPER_COMMA = -3;
123 vector<tree_node> childs;
131 sum_results(0), num_visits(0), var(_var), finished(
false) {}
136 vector<vector<WORD> > optimize_best_Horner_schemes;
137 int optimize_num_vars;
138 int optimize_best_num_oper;
139 vector<WORD> optimize_best_instr;
140 vector<WORD> optimize_best_vars;
143 bool mcts_factorized, mcts_separated;
144 vector<WORD> mcts_vars;
147 set<pair<int,vector<WORD> > > mcts_best_schemes;
150 pthread_mutex_t optimize_lock;
158 void print_instr (
const vector<WORD> &instr, WORD num)
160 const WORD *tbegin = &*instr.begin();
161 const WORD *tend = tbegin+instr.size();
162 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
163 MesPrint(
"<%d> %a",num,t[2],t);
179 template <
class RandomAccessIterator>
181 for (
int i=to-fr-1; i>0; --i)
182 swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
190 static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
212 SetScratch(AR.infile,&(e->onfile));
215 WORD *term = AT.WorkPointer;
221 while (GetTerm(BHEAD term) > 0) {
222 AT.WorkPointer = term + *term;
224 WORD *t2 = term + *term;
225 if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0)
return -1;
228 AT.WorkPointer = term + *term;
233 LONG len =
EndSort(BHEAD (WORD *)((VOID *)(&optimize_expr)),2);
235 AT.WorkPointer = term;
247 LONG PF_get_expression (
int exprnr) {
249 if (PF.me == MASTER) {
252 if (PF.numtasks > 1) {
259 #define get_expression PF_get_expression 284 bool has_brackets =
false;
285 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
286 WORD *tend=t+*t; tend-=ABS(*(tend-1));
287 for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
293 vector<vector<WORD> > brackets;
297 for (WORD *t=optimize_expr; *t!=0; t+=*t)
299 WORD *newexpr = (WORD *)Malloc1(exprlen*
sizeof(WORD),
"optimize newexpr");
304 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
308 vector<WORD> bracket;
309 for (; *t1!=HAAKJE; t1+=*(t1+1))
310 bracket.insert(bracket.end(), t1, t1+*(t1+1));
312 if (brackets.size()==0 || bracket!=brackets.back()) {
314 brackets.push_back(bracket);
318 WORD left = t + *t - t1;
319 bool more_symbols = (left != ABS(*(t+*t-1)));
322 newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
323 newexpr[i++] = SYMBOL;
324 newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
325 newexpr[i++] = SEPARATESYMBOL;
326 newexpr[i++] = sep_power;
334 newexpr[i++] = *(t1++);
344 if ( sep_power == 1 )
347 vector<WORD> bracket;
348 bracket.push_back(0);
349 bracket.push_back(0);
350 bracket.push_back(0);
351 bracket.push_back(0);
353 brackets.push_back(bracket);
355 newexpr[i++] = SYMBOL;
357 newexpr[i++] = SEPARATESYMBOL;
358 newexpr[i++] = sep_power;
363 for (t=optimize_expr; *t!=0; t+=*t) {}
364 if ( t-optimize_expr+1 < i ) {
365 M_free(optimize_expr,
"$-sort space");
366 optimize_expr = (WORD *)Malloc1(i*
sizeof(WORD),
"$-sort space");
372 memcpy(optimize_expr, newexpr, i*
sizeof(WORD));
373 M_free(newexpr,
"optimize newexpr");
376 if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
377 for (WORD *t=optimize_expr; *t!=0; t+=*t) {
378 if (*t == ABS(*(t+*t-1))+1)
continue;
380 for (
int i=3; i<t[2]; i+=2)
381 if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
383 return vector<vector<WORD> >();
404 while (*(expr+n)!=0) n+=*(expr+n);
406 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
407 WORD maxpowfac=1, maxpowsep=1;
409 for (
const WORD *t=expr; *t!=0; t+=*t) {
410 if (t!=expr) cntadd++;
411 if (*t==ABS(*(t+*t-1))+1)
continue;
416 for (
int i=3; i<t[2]; i+=2) {
417 if (t[i]==FACTORSYMBOL) {
418 maxpowfac = max(maxpowfac, t[i+1]);
421 if (t[i]==SEPARATESYMBOL) {
422 maxpowsep = max(maxpowsep, t[i+1]);
427 sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
429 if (t[i+1]==2) cntmul++;
433 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++;
435 if (cntsym > 0) cntmul+=cntsym-1;
438 cntadd -= maxpowfac-1;
439 cntmul += maxpowfac-1;
441 cntadd -= maxpowsep-1;
444 MesPrint (
"*** STATS: original %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
446 return sumpow+cntmul+cntadd;
457 int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
459 const WORD *ebegin = &*instr.begin();
460 const WORD *eend = ebegin+instr.size();
462 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
463 for (
const WORD *t=e+3; *t!=0; t+=*t) {
465 if (*(e+1)==OPER_ADD) cntadd++;
466 if (*(e+1)==OPER_MUL) cntmul++;
468 if (*t == ABS(*(t+*t-1))+1)
continue;
469 if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
470 if (*(t+4)==2) cntmul++;
473 sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
476 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++;
481 MesPrint (
"*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
483 return sumpow+cntmul+cntadd;
502 for (
const WORD *t=expr; *t!=0; t+=*t) {
503 if (*t == ABS(*(t+*t-1))+1)
continue;
505 for (
int i=3; i<t[2]; i+=2)
509 bool is_fac=
false, is_sep=
false;
510 if (cnt.count(FACTORSYMBOL)) {
512 cnt.erase(FACTORSYMBOL);
514 if (cnt.count(SEPARATESYMBOL)) {
516 cnt.erase(SEPARATESYMBOL);
520 vector<pair<int,WORD> > cnt_order;
521 for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
522 cnt_order.push_back(make_pair(i->second, i->first));
523 sort(cnt_order.rbegin(), cnt_order.rend());
527 for (
int i=0; i<(int)cnt_order.size(); i++)
528 order.push_back(cnt_order[i].second);
530 if (rev) reverse(order.begin(),order.end());
533 if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
534 if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
579 WORD
getpower (
const WORD *term,
int var,
int pos) {
580 if (*term == ABS(*(term+*term-1))+1)
return 0;
581 if (2*pos+2 >= term[2])
return 0;
582 if (term[2*pos+3] != var)
return 0;
583 return term[2*pos+4];
594 int an=ABS(n), sn=SGN(n);
595 while (*(t+an-1)==0) an--;
599 void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
602 GcdLong(BHEAD a,na,b,nb,c,nc);
605 void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd) {
608 DivLong(a,na,b,nb,c,nc,d,nd);
631 void build_Horner_tree (
const WORD **terms,
int numterms,
int var,
int maxvar,
int pos, vector<WORD> *res) {
639 for (
int fr=0; fr<numterms; fr++) {
642 const WORD *t = terms[fr];
645 if (*t != ABS(*(t+*t-1))+1)
646 for (
int i=3+2*pos; i<t[2]; i+=2) {
647 res->push_back(SYMBOL);
649 res->push_back(t[i]);
650 res->push_back(t[i+1]);
651 if (!empty) res->push_back(OPER_MUL);
657 res->push_back(SNUMBER);
665 res->push_back(SNUMBER);
666 WORD n = ABS(*(t+*t-1));
668 for (
int i=0; i<n; i++)
669 res->push_back(*(t+*t-n+i));
670 res->push_back(OPER_MUL);
672 if (fr>0) res->push_back(OPER_ADD);
679 res->push_back(SNUMBER);
684 res->push_back(OPER_MUL);
690 WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
691 UWORD *num = NumberMalloc(
"build_Horner_tree");
692 UWORD *den = NumberMalloc(
"build_Horner_tree");
693 UWORD *tmp = NumberMalloc(
"build_Horner_tree");
694 UWORD *dum = NumberMalloc(
"build_Horner_tree");
697 int prev_coeff_idx = -1;
699 for (
int fr=0; fr<numterms;) {
702 WORD pow =
getpower(terms[fr], var, pos);
706 while (to<numterms &&
getpower(terms[to], var, pos) == pow) to++;
711 if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
713 WORD n1 = res->at(res->size()-2) / 2;
714 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
716 WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
717 WORD n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
720 GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
721 GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
724 for (
int i=0; i<2; i++) {
725 if (i==1 && fr==0)
break;
727 WORD *t = i==0 ? t1 : t2;
728 WORD n = i==0 ? n1 : n2;
730 DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
731 for (
int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
732 for (
int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
734 if (SGN(ntmp) != SGN(n)) n=-n;
736 DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
737 for (
int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
738 for (
int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
740 *t++ = SGN(n) * (2*ABS(n)+1);
744 if (fr>0) res->push_back(OPER_ADD);
747 WORD nextpow = (to==numterms ? 0 :
getpower(terms[to], var, pos));
749 if (pow-nextpow > 0) {
750 res->push_back(SYMBOL);
753 res->push_back(pow-nextpow);
754 res->push_back(OPER_MUL);
758 res->push_back(SNUMBER);
759 WORD n = MaX(ABS(nnum),nden);
760 res->push_back(n*2+3);
761 for (
int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
762 for (
int i=0; i<n-ABS(nnum); i++) res->push_back(0);
763 for (
int i=0; i<nden; i++) res->push_back(den[i]);
764 for (
int i=0; i<n-ABS(nden); i++) res->push_back(0);
765 res->push_back(SGN(nnum)*(2*n+1));
766 res->push_back(OPER_MUL);
768 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
770 else if (AN.poly_vars[var]==FACTORSYMBOL) {
775 WORD n1 = res->at(res->size()-2) / 2;
776 WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
777 WORD *t2 = &res->at(prev_coeff_idx);
778 WORD n2 = *(t2+*(t2+1)-1) / 2;
781 MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
785 for (
int i=0; i<ABS(n2); i++)
786 t2[i] = t2[n2+i] = i==0 ? 1 : 0;
790 for (
int i=0; i<2*ABS(n1)+2; i++)
794 res->back() = 2*ABS(ntmp)+3;
795 res->insert(res->end(), tmp, tmp+2*ABS(ntmp));
796 res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1));
797 res->push_back(OPER_MUL);
800 prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
804 res->push_back(OPER_MUL);
808 res->push_back(OPER_COMMA);
816 NumberFree(dum,
"build_Horner_tree");
817 NumberFree(tmp,
"build_Horner_tree");
818 NumberFree(den,
"build_Horner_tree");
819 NumberFree(num,
"build_Horner_tree");
832 if (*a == ABS(*(a+*a-1))+1)
return true;
833 if (*b == ABS(*(b+*b-1))+1)
return false;
834 if (a[1]!=SYMBOL)
return true;
835 if (b[1]!=SYMBOL)
return false;
836 for (
int i=3; i<a[2] && i<b[2]; i+=2) {
837 if (a[i] !=b[i] )
return a[i] >b[i];
838 if (a[i+1]!=b[i+1])
return a[i+1]<b[i+1];
852 vector<WORD>
Horner_tree (
const WORD *expr,
const vector<WORD> &order) {
854 MesPrint (
"*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
860 map<WORD,WORD> renum;
861 AN.poly_num_vars = order.size();
862 AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*
sizeof(WORD),
"AN.poly_vars");
863 for (
int i=0; i<AN.poly_num_vars; i++) {
864 AN.poly_vars[i] = order[i];
869 WORD *sorted = AT.WorkPointer;
871 for (
const WORD *t=expr; *t!=0; t+=*t) {
872 memcpy(sorted, t, *t*
sizeof(WORD));
874 if (*t != ABS(*(t+*t-1))+1) {
878 for (
int i=3; i<sorted[2]; i+=2) {
879 if (!renum.count(sorted[i])) {
880 renum[sorted[i]] = AN.poly_num_vars;
882 WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*
sizeof(WORD),
"AN.poly_vars");
883 memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*
sizeof(WORD));
884 M_free(AN.poly_vars,
"poly_vars");
885 AN.poly_vars = new_poly_vars;
886 AN.poly_vars[AN.poly_num_vars] = sorted[i];
889 sorted[i] = renum[sorted[i]];
892 for (
int i=0; i<sorted[2]/2; i++)
893 for (
int j=3; j+2<sorted[2]; j+=2)
894 if (sorted[j] > sorted[j+2]) {
895 swap(sorted[j] , sorted[j+2]);
896 swap(sorted[j+1], sorted[j+3]);
904 sorted = AT.WorkPointer;
907 vector<const WORD *> terms;
908 for (
const WORD *t=sorted; *t!=0; t+=*t)
918 for (
int i=0; i<(int)res.size();) {
919 if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
921 else if (res[i]==SYMBOL) {
922 memmove(&res[j], &res[i], res[i+1]*
sizeof(WORD));
926 else if (res[i]==SNUMBER) {
927 int n = (res[i+1]-2)/2;
929 while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
931 res[j++] = 2*(n-dn) + 3;
932 memmove(&res[j], &res[i+2], (n-dn)*
sizeof(WORD));
934 memmove(&res[j], &res[i+n+2], (n-dn)*
sizeof(WORD));
936 res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
943 MesPrint (
"*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
955 void print_tree (
const vector<WORD> &
tree) {
959 for (
int i=0; i<(int)
tree.size();) {
960 if (
tree[i]==OPER_ADD) {
964 else if (
tree[i]==OPER_MUL) {
968 else if (
tree[i]==OPER_COMMA) {
972 else if (
tree[i]==SNUMBER) {
975 PrtLong((UWORD *)&
tree[i+2], n, buf);
976 int l = strlen((
char *)buf);
979 PrtLong((UWORD *)&
tree[i+2+n], n, buf+l+1);
983 else if (
tree[i]==SYMBOL) {
984 if (AN.poly_vars[
tree[i+2]] < 10000)
985 MesPrint(
"%s^%d%", VARNAME(symbols,AN.poly_vars[
tree[i+2]]),
tree[i+3]);
987 MesPrint(
"Z%d^%d%", MAXVARIABLES-AN.poly_vars[
tree[i+2]],
tree[i+3]);
1008 size_t operator()(
const vector<WORD>& n)
const {
1014 bool operator()(
const vector<WORD>& lhs,
const vector<WORD>& rhs)
const {
1015 if (lhs.size() != rhs.size())
return false;
1016 for (
unsigned int i = 1; i < lhs.size(); i++) {
1017 if (lhs[i] != rhs[i])
return false;
1024 template<
typename T>
size_t hash_range(T* array,
int size) {
1027 for (
int i = 0; i < size; i++) {
1028 hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
1088 MesPrint (
"*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1089 thetime_str().c_str(), do_CSE?1:0);
1092 typedef unordered_map<vector<WORD>, int,
CSEHash,
CSEEq> csemap;
1098 ID.rehash(mcts_expr_score * 2);
1110 for (
int i=0; i<(int)
tree.size();) {
1113 if (
tree[i]==SNUMBER) {
1114 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1116 x.push_back(SNUMBER);
1118 int sign = SGN(x.back());
1121 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1123 s.push(sign * suc.first->second);
1128 else if (
tree[i]==SYMBOL) {
1129 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1132 x.push_back(SYMBOL);
1133 x.push_back(
tree[i+2]+1);
1134 x.push_back(
tree[i+3]);
1136 csemap::iterator it = ID.find(x);
1137 if (do_CSE && it != ID.end()) {
1141 s.push(EXTRASYMBOL);
1144 if (numinstr == MAXPOSITIVE) {
1145 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1150 instr.push_back(numinstr);
1151 instr.push_back(OPER_MUL);
1152 instr.push_back(9+OPTHEAD);
1154 instr.push_back(SYMBOL);
1156 instr.push_back(
tree[i+2]);
1157 instr.push_back(
tree[i+3]);
1166 s.push(EXTRASYMBOL);
1172 s.push(
tree[i+2]+1);
1186 hash.push_back(oper);
1189 for (
int operand=0; operand<2; operand++) {
1190 hash.push_back(s.top()); s.pop();
1191 x.push_back(s.top()); s.pop();
1192 x.push_back(s.top()); s.pop();
1193 x.push_back(s.top()); s.pop();
1196 x[0] = hash_range(&hash[0], 3);
1199 if (oper==OPER_MUL) {
1200 bool do_continue =
false;
1202 for (
int operand=0; operand<2; operand++) {
1203 int idx_oper1 = operand==0 ? 2 : 5;
1204 int idx_oper2 = operand==0 ? 5 : 2;
1207 if (x[idx_oper1]==SNUMBER) {
1209 int idx = ABS(x[idx_oper1+1])-1;
1210 if (
tree[idx+2]==1 &&
tree[idx+3]==1 && ABS(
tree[idx+4])==3) {
1212 s.push(x[idx_oper2+2]);
1213 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1214 s.push(x[idx_oper2]);
1215 s.push(hash[1 + (operand + 1) % 2]);
1222 if (do_continue)
continue;
1227 csemap::iterator it = ID.find(x);
1228 if (!do_CSE || it == ID.end()) {
1229 if (numinstr == MAXPOSITIVE) {
1230 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1234 instr.push_back(numinstr);
1235 instr.push_back(x[1]);
1240 int lenidx = instr.size()-1;
1242 for (
int j=0; j<2; j++)
1243 if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
1245 instr.push_back(x[3*j+2]);
1247 instr.push_back(ABS(x[3*j+3])-1);
1248 instr.push_back(x[3*j+4]);
1251 instr.push_back(3*SGN(x[3*j+3]));
1255 int t = ABS(x[3*j+3])-1;
1256 instr.push_back(
tree[t+1]-1);
1257 instr.insert(instr.end(), &
tree[t+2], &
tree[t]+
tree[t+1]);
1258 instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
1259 instr[lenidx] +=
tree[t+1]-1;
1269 s.push(EXTRASYMBOL);
1275 MesPrint (
"*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1298 typedef unordered_map<vector<WORD>, int,
CSEHash,
CSEEq> csemap;
1303 ID.rehash(mcts_expr_score * 2);
1309 int numinstr = 0, numcommas = 0;
1313 for (
int i=0; i<(int)
tree.size();) {
1316 if (
tree[i]==SNUMBER) {
1317 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1319 x.push_back(SNUMBER);
1321 int sign = SGN(x.back());
1324 std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1326 s.push(sign * suc.first->second);
1331 else if (
tree[i]==SYMBOL) {
1332 WORD hash = hash_range(&
tree[i],
tree[i + 1]);
1335 x.push_back(SYMBOL);
1336 x.push_back(
tree[i+2]+1);
1337 x.push_back(
tree[i+3]);
1339 csemap::iterator it = ID.find(x);
1340 if (it != ID.end()) {
1344 s.push(EXTRASYMBOL);
1346 if (
tree[i + 3] == 2)
1349 numinstr += (int)floor(log(
tree[i+3])/log(2.0)) + popcount(
tree[i+3]) - 1;
1354 s.push(EXTRASYMBOL);
1360 s.push(
tree[i+2]+1);
1375 hash.push_back(oper);
1378 for (
int operand=0; operand<2; operand++) {
1379 hash.push_back(s.top()); s.pop();
1380 x.push_back(s.top()); s.pop();
1381 x.push_back(s.top()); s.pop();
1382 x.push_back(s.top()); s.pop();
1386 x[0] = hash_range(&hash[0], 3);
1389 if (oper==OPER_MUL) {
1390 bool do_continue =
false;
1392 for (
int operand=0; operand<2; operand++) {
1393 int idx_oper1 = operand==0 ? 2 : 5;
1394 int idx_oper2 = operand==0 ? 5 : 2;
1397 if (x[idx_oper1]==SNUMBER) {
1399 int idx = ABS(x[idx_oper1+1])-1;
1400 if (
tree[idx+2]==1 &&
tree[idx+3]==1 && ABS(
tree[idx+4])==3) {
1402 s.push(x[idx_oper2+2]);
1403 s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1404 s.push(x[idx_oper2]);
1405 s.push(hash[1 + (operand + 1) % 2]);
1412 if (do_continue)
continue;
1417 csemap::iterator it = ID.find(x);
1418 if (it == ID.end()) {
1419 if (numinstr == MAXPOSITIVE) {
1420 MesPrint((
char *)
"ERROR: too many temporary variables needed in optimization");
1424 if (oper == OPER_COMMA) numcommas++;
1428 s.push(EXTRASYMBOL);
1433 s.push(EXTRASYMBOL);
1441 return numinstr - numcommas;
1456 node() : l(NULL), r(NULL), sign(1), hash(0) {};
1457 node(
const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
1461 int cmp(
const struct node* rhs)
const {
1462 if (
this == rhs)
return 0;
1464 if (data[0] != rhs->data[0])
return data[0] < rhs->data[0] ? -1 : 1;
1465 int mod = data[0] == SNUMBER ? -1 : 0;
1466 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1467 for (
int i = 0; i < data[1] + mod; i++) {
1468 if (data[i] != rhs->data[i])
return data[i] < rhs->data[i] ? -1 : 1;
1471 int lv = l->cmp(rhs->l);
1472 if (lv != 0)
return lv;
1473 int rv = r->cmp(rhs->r);
1474 if (rv != 0)
return rv;
1477 if (l->sign != rhs->l->sign)
return l->sign < rhs->l->sign ? -1 : 1;
1478 if (r->sign != rhs->r->sign)
return r->sign < rhs->r->sign ? -1 : 1;
1485 bool operator() (
const struct node* lhs,
const struct node* rhs)
const 1487 return lhs->cmp(rhs) < 0;
1491 int mod = data[0] == SNUMBER ? -1 : 0;
1492 if (data[0] == SYMBOL || data[0] == SNUMBER) {
1493 hash = hash_range(data, data[1] + mod);
1495 if (l->hash == 0) l->calcHash();
1496 if (r->hash == 0) r->calcHash();
1499 size_t newr[] = {(size_t)data[0], l->hash, (
size_t)l->sign, r->hash, (size_t)r->sign};
1500 hash = hash_range(newr, 5);
1506 size_t operator()(
const NODE* n)
const {
1512 bool operator()(
const NODE* lhs,
const NODE* rhs)
const {
1513 return lhs->cmp(rhs) == 0;
1517 NODE* buildTree(vector<WORD> &
tree) {
1523 unsigned int curIndex = 0;
1526 for (
int i=0; i<(int)
tree.size();) {
1531 if (
tree[i]==SYMBOL ||
tree[i] == SNUMBER) {
1533 if (
tree[i] == SNUMBER) {
1534 c->sign = SGN(
tree[i +
tree[i + 1] -1]);
1541 c->r = st.top(); st.pop();
1542 c->l = st.top(); st.pop();
1546 if (c->data[0] == OPER_MUL) {
1547 NODE* ch[] = {c->r, c->l};
1548 for (
int j = 0; j < 2; j++)
1549 if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
1550 ch[(j+1)%2]->sign *= ch[j]->sign;
1570 for (
unsigned int i = 0; i < curIndex; i++) {
1571 if (ar[i].l == ar) ar[i].l = st.top();
1572 if (ar[i].r == ar) ar[i].r = st.top();
1575 swap(ar[0], *st.top());
1579 int count_operators_cse_topdown (vector<WORD> &
tree) {
1580 typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
1585 ID.rehash(mcts_expr_score * 2);
1593 while (!stack.empty())
1595 NODE* c = stack.top();
1598 if (c->data[0] == SYMBOL) {
1599 if (c->data[3] > 1) {
1600 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1602 if (c->data[3] == 2)
1605 numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
1609 if (c->data[0] != SNUMBER) {
1611 std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1617 if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
1625 M_free(root,
"CSE tree");
1634 vector<WORD> simulated_annealing() {
1635 float minT = AO.Optimize.saMinT.fval;
1636 float maxT = AO.Optimize.saMaxT.fval;
1638 float coolrate = pow(minT / maxT, 1 / (
float)AO.Optimize.saIter);
1645 if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
1646 if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
1651 int curscore = count_operators_cse_topdown(
tree);
1654 AN.poly_num_vars = 0;
1655 M_free(AN.poly_vars,
"poly_vars");
1657 std::vector<WORD> best = state;
1658 int bestscore = curscore;
1660 if (startindex == (
int)state.size() || (int)state.size() - startindex < 2) {
1664 for (
int o = 0; o < AO.Optimize.saIter; o++) {
1665 int inda = iranf(BHEAD state.size() - startindex) + startindex;
1666 int indb = iranf(BHEAD state.size() - startindex) + startindex;
1672 swap(state[inda], state[indb]);
1675 int newscore = count_operators_cse_topdown(
tree);
1678 AN.poly_num_vars = 0;
1679 M_free(AN.poly_vars,
"poly_vars");
1681 if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (
float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
1682 curscore = newscore;
1684 if (curscore < bestscore) {
1685 bestscore = curscore;
1689 swap(state[inda], state[indb]);
1693 MesPrint(
"Score at step %d: %d", o, curscore);
1699 MesPrint(
"Simulated annealing score: %d", bestscore);
1781 inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
1783 vector<WORD> &order = *porder;
1784 vector<WORD> &schemev = *pscheme;
1785 vector<tree_node *> &path = *ppath;
1786 int depth = 0, nchild0;
1787 float slide_down_factor = 1.0;
1794 path.push_back(select);
1795 nchild0 = select->childs.size();
1796 while (select->childs.size() > 0) {
1798 select->num_visits++;
1799 select->sum_results+=mcts_expr_score;
1802 switch ( AO.Optimize.mctsdecaymode ) {
1804 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1807 if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
1808 slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
1809 slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
1812 slide_down_factor = 0.0001;
1816 float dd = 1.0-(1.0*depth)/(1.0*nchild0);
1817 slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1818 if ( dd <= 0.000001 ) slide_down_factor = 1.0;
1819 else slide_down_factor /= dd;
1820 if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
1826 MesPrint(
"select %d",select->var);
1832 for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
1834 if (p->num_visits >= 1) {
1837 score = mcts_expr_score / (p->sum_results/p->num_visits) +
1841 2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
1844 printf(
"%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1845 p->num_visits,p->finished?1:0);
1854 printf(
"%d: inf\n",p->var); fflush(stdout);
1859 if (!p->finished && score>best) {
1867 select->finished=
true;
1873 path.push_back(select);
1874 order.push_back(select->var);
1881 MesPrint(
"expand %d",select->var);
1887 for (
int i=0; i<(int)order.size(); i++)
1888 var_used.insert(ABS(order[i])-1);
1891 if (!select->finished && select->childs.size()==0) {
1893 int sign = (order.size() == 0) ? 1 : SGN(order.back());
1894 for (
int i=0; i<(int)mcts_vars.size(); i++)
1895 if (!var_used.count(mcts_vars[i])) {
1896 new_node.childs.push_back(
tree_node(sign*(mcts_vars[i]+1)));
1897 if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
1898 new_node.childs.push_back(
tree_node(-sign*(mcts_vars[i]+1)));
1904 LOCK(optimize_lock);
1906 UNLOCK(optimize_lock);
1909 if (select->childs.size()==0)
1910 select->finished =
true;
1913 select->num_visits++;
1914 select->sum_results+=mcts_expr_score;
1921 for (
int i=0; i<(int)mcts_vars.size(); i++)
1922 if (!var_used.count(mcts_vars[i]))
1923 scheme.push_back(mcts_vars[i]);
1926 for (
int i=(
int)order.size()-1; i>=0; i--) {
1928 scheme.push_front(order[i]-1);
1930 scheme.push_back(-order[i]-1);
1934 if (mcts_factorized)
1935 scheme.push_front(FACTORSYMBOL);
1937 scheme.push_front(SEPARATESYMBOL);
1940 schemev = vector<WORD>(scheme.begin(), scheme.end());
1950 inline static void try_MCTS_scheme (PHEAD
const vector<WORD> &scheme,
int *pnum_oper) {
1960 int num_oper = count_operators_cse_topdown(
tree);
1963 AN.poly_num_vars = 0;
1964 M_free(AN.poly_vars,
"poly_vars");
1966 *pnum_oper = num_oper;
1976 inline static void update_MCTS_scheme (
int num_oper,
const vector<WORD> &scheme, vector<tree_node *> *ppath) {
1978 vector<tree_node *> &path = *ppath;
1981 if ((
int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
1982 (--mcts_best_schemes.end())->first > num_oper) {
1987 LOCK(optimize_lock);
1988 mcts_best_schemes.insert(make_pair(num_oper,scheme));
1989 if ((
int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
1990 mcts_best_schemes.erase(--mcts_best_schemes.end());
1991 UNLOCK(optimize_lock);
1998 for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
1999 (*p)->sum_results += num_oper - mcts_expr_score;
2007 void find_Horner_MCTS_expand_tree () {
2010 MesPrint (
"*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2020 vector<WORD> scheme;
2023 vector<tree_node *> path;
2028 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2029 try_MCTS_scheme(BHEAD scheme, &num_oper);
2032 MesPrint (
"{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2034 update_MCTS_scheme(num_oper, scheme, &path);
2037 MesPrint (
"*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2038 thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2052 vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2055 void PF_find_Horner_MCTS_expand_tree_master_init () {
2057 PF_opt_MCTS_tasks.resize(PF.numtasks);
2058 for (
int i = 1; i < PF.numtasks; i++) {
2059 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2067 int PF_find_Horner_MCTS_expand_tree_master_next () {
2071 PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2074 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2075 if (!p.first.empty()) {
2079 update_MCTS_scheme(num_oper, p.first, &p.second);
2091 void PF_find_Horner_MCTS_expand_tree_master () {
2094 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2098 vector<WORD> scheme;
2099 vector<tree_node *> path;
2101 next_MCTS_scheme(BHEAD &order, &scheme, &path);
2104 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2108 int len = scheme.size();
2114 pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2119 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2125 void PF_find_Horner_MCTS_expand_tree_master_wait () {
2128 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2132 for (
int i = 1; i < PF.numtasks; i++) {
2133 int next = PF_find_Horner_MCTS_expand_tree_master_next();
2142 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2148 void PF_find_Horner_MCTS_expand_tree_slave () {
2151 MesPrint (
"*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2154 vector<WORD> scheme;
2161 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2173 if (len == 0)
break;
2179 try_MCTS_scheme(scheme, &num_oper);
2183 PF_Pack(&num_oper, 1, PF_INT);
2184 PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2188 MesPrint (
"*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2211 if (PF.me != MASTER) {
2212 if (PF.numtasks <= 1)
return;
2213 PF_find_Horner_MCTS_expand_tree_slave();
2219 MesPrint (
"*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2232 for (WORD *t=optimize_expr; *t!=0; t+=*t)
2234 for (
int i=3; i<t[2]; i+=2)
2235 var_set.insert(t[i]);
2239 mcts_factorized = var_set.count(FACTORSYMBOL);
2240 if (mcts_factorized) var_set.erase(FACTORSYMBOL);
2241 mcts_separated = var_set.count(SEPARATESYMBOL);
2242 if (mcts_separated) var_set.erase(SEPARATESYMBOL);
2244 mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
2245 optimize_num_vars = (int)mcts_vars.size();
2247 for (
int i=0; i<(int)mcts_vars.size(); i++) {
2248 if (AO.Optimize.hornerdirection != O_BACKWARD)
2249 mcts_root.childs.push_back(
tree_node(+(mcts_vars[i]+1)));
2250 if (AO.Optimize.hornerdirection != O_FORWARD)
2251 mcts_root.childs.push_back(
tree_node(-(mcts_vars[i]+1)));
2255 #if defined(WITHMPI) 2256 PF_find_Horner_MCTS_expand_tree_master_init();
2264 for (
int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
2265 (AO.Optimize.mctstimelimit==0 || (
TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
2267 AT.optimtimes = times;
2269 #if defined(WITHPTHREADS) 2270 if (AM.totalnumberofthreads > 1)
2271 find_Horner_MCTS_expand_tree_threaded();
2273 #elif defined(WITHMPI) 2274 if (PF.numtasks > 1)
2275 PF_find_Horner_MCTS_expand_tree_master();
2278 find_Horner_MCTS_expand_tree();
2286 PF_find_Horner_MCTS_expand_tree_master_wait();
2289 MesPrint (
"*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2359 MesPrint (
"*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2363 vector<const WORD *> instr;
2364 const WORD *tbegin = &*all_instr.begin();
2365 const WORD *tend = tbegin+all_instr.size();
2367 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
2370 int n = instr.size();
2373 vector<int> par(n), numpar(n,0);
2374 for (
int i=0; i<n; i++) par[i]=i;
2376 for (
int i=0; i<n; i++) {
2377 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2378 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
2385 if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
2386 if (*(t+4)>1) numpar[*(t+3)]++;
2393 vector<bool> vis(n,
false);
2395 while (!s.empty()) {
2397 int i=s.top(); s.pop();
2398 if (vis[i])
continue;
2401 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
2402 if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
2406 if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
2407 par[i] = par[par[i]];
2413 vector<WORD> newinstr;
2416 stack<WORD> new_expr;
2418 vis = vector<bool>(n,
false);
2421 vector<bool> skip(n,
false), skipcoeff(n,
false);
2422 for (
int i=0; i<n; i++)
2423 if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=
true;
2426 vector<int> renum_par(n);
2427 for (
int i=0; i<n; i++)
2430 while (!new_expr.empty()) {
2431 int x = new_expr.top(); new_expr.pop();
2432 if (vis[x])
continue;
2437 bool first_copy=
true;
2438 int lenidx = newinstr.size()+2;
2441 stack<WORD> this_expr;
2442 this_expr.push(x+1);
2444 while (!this_expr.empty()) {
2446 int i = this_expr.top(); this_expr.pop();
2449 for (
const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2454 bool copy = !skip[i];
2455 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
2456 if (par[*(t+3)] == x) {
2459 this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
2460 if (*(instr[i]+1) == OPER_MUL) sign=1;
2464 new_expr.push(*(t+3));
2468 if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
2475 newinstr.push_back(renum_par[x]);
2476 newinstr.push_back(*(instr[x]+1));
2477 newinstr.push_back(3);
2482 int thislenidx = newinstr.size();
2483 newinstr.insert(newinstr.end(), t, t+*t);
2484 newinstr.back() *= sign;
2485 if (*(instr[i]+1) == OPER_MUL) sign=1;
2486 newinstr[lenidx] += *t;
2490 if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
2491 *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
2494 const WORD *t1 = instr[*(t+3)]+OPTHEAD;
2495 const WORD *t2 = t1+*t1;
2497 if (*t1 == 1+ABS(*(t1+*t1-1))) {
2501 WORD *t3 = &*newinstr.end();
2502 int sign2 = SGN(t3[-1]);
2503 newinstr.erase(newinstr.end()-3, newinstr.end());
2506 for (
const WORD *tt=t1; *tt!=0; tt+=*tt) {
2509 if (numargs==2 && *(t2+4)==1) {
2511 newinstr[newinstr.size()-4] = *(t2+1);
2512 newinstr[newinstr.size()-3] = *(t2+2);
2513 newinstr[newinstr.size()-2] = *(t2+3);
2514 newinstr[newinstr.size()-1] = *(t2+4);
2515 sign2 *= SGN(*(t2+*t2-1));
2519 if (*(t2+1)==EXTRASYMBOL)
2520 renum_par[*(t+3)] = *(t2+3);
2527 if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
2528 for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
2529 if ( tt[*tt-1] < 0 ) {
2530 tt[*tt-1] = -tt[*tt-1];
2535 skipcoeff[*(t+3)]=
true;
2539 newinstr.insert(newinstr.end(), t1+1, t1+*t1);
2540 newinstr.back() *= sign2;
2541 newinstr[thislenidx] += ABS(newinstr.back()) - 3;
2542 newinstr[lenidx] += ABS(newinstr.back()) - 3;
2551 newinstr.push_back(0);
2561 vector<int> renum(n,-1);
2563 for (
int i=0; i<n; i++)
2564 if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
2567 tbegin = &*newinstr.begin();
2568 tend = tbegin+newinstr.size();
2569 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2))
2570 instr[renum[*t]] = t;
2574 vector<WORD> sortinstr;
2575 for (
int i=0; i<next; i++) {
2576 int idx = sortinstr.size();
2577 sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2579 sortinstr[idx] = renum[sortinstr[idx]];
2582 for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
2583 if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
2584 *(t2+3) = renum[*(t2+3)];
2588 MesPrint (
"*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2625 int type, arg1, arg2, improve;
2627 vector<int> eqnidxs;
2630 if (arg1 != a.arg1)
return arg1 < a.arg1;
2631 if (arg2 != a.arg2)
return arg2 < a.arg2;
2632 if (type != a.type)
return type < a.type;
2633 return coeff < a.coeff;
2654 MesPrint (
"*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2659 vector<optimization> res;
2663 map<optimization, vector<int> > cnt;
2666 map<vector<int>,
int> idx_coeff;
2668 const WORD *ebegin = &*instr.begin();
2669 const WORD *eend = ebegin+instr.size();
2671 for (
int optim_type=0; optim_type<=4; optim_type++) {
2677 optim.type = optim_type;
2681 if (optim_type == 0) {
2682 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2683 if (*(e+1) != OPER_MUL)
continue;
2684 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2685 if (*t == ABS(*(t+*t-1))+1)
continue;
2687 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2688 optim.arg2 = *(t+4);
2689 cnt[optim].push_back(e-ebegin);
2696 if (optim_type == 1) {
2697 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2698 if (*(e+1) != OPER_MUL)
continue;
2700 for (
const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2701 if (*t1 == ABS(*(t1+*t1-1))+1)
continue;
2702 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2704 for (
const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2705 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
2706 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2708 if (*(t1+4) == *(t2+4)) {
2711 if (optim.arg1 > optim.arg2)
2712 swap(optim.arg1, optim.arg2);
2715 cnt[optim].push_back(e-ebegin);
2718 cnt[optim].push_back(e-ebegin);
2719 cnt[optim].push_back(e-ebegin);
2728 if (optim_type == 2) {
2729 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2731 if (*(e+1) == OPER_ADD) {
2734 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2735 if (*t == ABS(*(t+*t-1))+1)
continue;
2738 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1)
continue;
2740 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2741 optim.coeff.back() = ABS(optim.coeff.back());
2743 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2746 cnt[optim].push_back(e-ebegin);
2750 else if (*(e+1) == OPER_MUL) {
2752 optim.coeff.clear();
2754 for (
const WORD *t=e+3; *t!=0; t+=*t)
2755 if (*t == ABS(*(t+*t-1))+1) {
2756 if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1)
continue;
2757 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2758 optim.coeff.back() = ABS(optim.coeff.back());
2761 if (!optim.coeff.empty())
2762 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2763 if (*t == ABS(*(t+*t-1))+1)
continue;
2764 if (*(t+4) != 1)
continue;
2765 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2767 cnt[optim].push_back(e-ebegin);
2774 if (optim_type == 3) {
2775 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2777 if (*(e+1) != OPER_ADD)
continue;
2779 optim.coeff.clear();
2781 for (
const WORD *t=e+3; *t!=0; t+=*t)
2782 if (*t == ABS(*(t+*t-1))+1) {
2783 optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2786 if (!optim.coeff.empty()) {
2787 for (
const WORD *t=e+3; *t!=0; t+=*t) {
2788 if (*t == ABS(*(t+*t-1))+1)
continue;
2789 if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1)
continue;
2790 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2792 optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2794 cnt[optim].push_back(e-ebegin);
2795 if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2802 if (optim_type == 4) {
2803 for (
const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2804 if (*(e+1) != OPER_ADD)
continue;
2806 for (
const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2807 if (*t1 == ABS(*(t1+*t1-1))+1)
continue;
2808 int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2810 for (
const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2811 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
2812 int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2814 int sign1 = SGN(*(t1+*t1-1));
2815 int sign2 = SGN(*(t2+*t2-1));
2817 if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
2819 optim.type = (sign1 * sign2 == 1 ? 4 : 5);
2822 if (optim.arg1 > optim.arg2) {
2823 swap(optim.arg1, optim.arg2);
2826 if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
2827 cnt[optim].push_back(e-ebegin);
2830 cnt[optim].push_back(e-ebegin);
2831 cnt[optim].push_back(e-ebegin);
2842 for (map<
optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
2843 int improve = i->second.size() - 1;
2845 res.push_back(i->first);
2846 res.back().improve = improve;
2847 res.back().eqnidxs = i->second;
2850 res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
2857 MesPrint (
"*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2889 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2890 thetime_str().c_str(), optim.improve,
2891 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1, optim.arg2);
2892 else if (optim.type==1 || optim.type>=4)
2893 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2894 thetime_str().c_str(), optim.improve,
2895 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1,
2896 optim.type==1 ?
'*' : optim.type==4 ?
'+' :
'-',
2897 optim.arg2>0?
'x':
'Z', ABS(optim.arg2)-1);
2899 WORD n = optim.coeff.back()/2;
2900 UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2901 PrtLong((UWORD *)&optim.coeff[0], n, num);
2902 PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2903 MesPrint (
"*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2904 thetime_str().c_str(), optim.improve,
2905 optim.arg1>0?
'x':
'Z', ABS(optim.arg1)-1,
2906 optim.type==2 ?
'*' :
'+', num,den);
2911 bool substituted =
false;
2912 WORD *ebegin = &*instr.begin();
2915 if (optim.type == 0) {
2917 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2918 int varnumx = ABS(optim.arg1) - 1;
2921 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
2922 WORD *e = ebegin + optim.eqnidxs[i];
2923 if (*(e+1) != OPER_MUL)
continue;
2926 for (WORD *t=e+3; *t!=0; t+=*t) {
2927 if (*t == ABS(*(t+*t-1))+1)
continue;
2928 if (*(t+1)==vartypex &&
2933 *(t+1) = EXTRASYMBOL;
2944 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2950 instr.push_back(newid);
2951 instr.push_back(OPER_MUL);
2952 instr.push_back(12);
2954 instr.push_back(vartypex);
2956 instr.push_back(varnumx);
2965 if (optim.type == 1) {
2967 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2968 int varnumx = ABS(optim.arg1) - 1;
2969 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
2970 int varnumy = ABS(optim.arg2) - 1;
2972 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
2973 WORD *e = ebegin + optim.eqnidxs[i];
2974 if (*(e+1) != OPER_MUL)
continue;
2978 for (WORD *t=e+3; *t!=0; t+=*t) {
2979 if (*t == ABS(*(t+*t-1))+1)
continue;
2980 if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
2981 if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
2985 if (powx>0 && powy>0 && powx==powy) {
2990 for (WORD *t=e+3; *t!=0;) {
2993 if (*t == ABS(*(t+*t-1))+1 ||
2994 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
2995 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
2996 memmove(newt, t, *t*
sizeof(WORD));
3000 sign *= SGN(*(t+*t-1));
3007 *newt++ = EXTRASYMBOL;
3022 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3028 instr.push_back(newid);
3029 instr.push_back(OPER_MUL);
3030 instr.push_back(20);
3032 instr.push_back(vartypex);
3034 instr.push_back(varnumx);
3040 instr.push_back(vartypey);
3042 instr.push_back(varnumy);
3052 if (optim.type == 2) {
3053 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3054 int varnum = ABS(optim.arg1) - 1;
3056 WORD ncoeff = optim.coeff.back();
3059 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3060 WORD *e = ebegin + optim.eqnidxs[i];
3062 if (*(e+1) == OPER_ADD) {
3064 for (WORD *t=e+3; *t!=0; t+=*t) {
3066 if (*t == ABS(*(t+*t-1))+1)
continue;
3068 if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
3069 BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3070 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3073 int sign = SGN(*(t+*t-1));
3076 while (*tend!=0) tend+=*tend;
3077 WORD nmove = tend - t - *t;
3078 memmove(t, t+*t, nmove*
sizeof(WORD));
3096 else if (*(e+1) == OPER_MUL) {
3098 bool coeff_match=
false, var_match=
false;
3102 for (WORD *t=e+3; *t!=0; t+=*t) {
3103 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3104 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3106 sign *= SGN(*(t+*t-1));
3108 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
3110 sign *= SGN(*(t+*t-1));
3115 if (coeff_match && var_match) {
3119 for (WORD *t=e+3; *t!=0;) {
3123 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3124 memmove(newt, t, dt*
sizeof(WORD));
3132 *newt++ = EXTRASYMBOL;
3148 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3154 instr.push_back(newid);
3155 instr.push_back(OPER_ADD);
3156 instr.push_back(9+ABS(ncoeff));
3157 instr.push_back(5+ABS(ncoeff));
3158 instr.push_back(vartype);
3160 instr.push_back(varnum);
3162 for (
int i=0; i<ABS(ncoeff); i++)
3163 instr.push_back(optim.coeff[i]);
3168 if (optim.type == 3) {
3169 int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3170 int varnum = ABS(optim.arg1) - 1;
3172 WORD ncoeff = optim.coeff.back();
3175 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3176 WORD *e = ebegin + optim.eqnidxs[i];
3178 if (*(e+1) != OPER_ADD)
continue;
3180 int coeff_match=0, var_match=0;
3182 for (WORD *t=e+3; *t!=0; t+=*t) {
3183 if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
3184 (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
3185 coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
3186 else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
3187 var_match = SGN(*(t+7));
3191 if (coeff_match * var_match == 1) {
3195 for (WORD *t=e+3; *t!=0;) {
3197 if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3198 memmove(newt, t, dt*
sizeof(WORD));
3205 *newt++ = EXTRASYMBOL;
3211 *newt++ = 3*coeff_match;
3220 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3226 instr.push_back(newid);
3227 instr.push_back(OPER_ADD);
3228 instr.push_back(13+ABS(ncoeff));
3230 instr.push_back(vartype);
3232 instr.push_back(varnum);
3237 instr.push_back(ABS(ncoeff)+1);
3238 for (
int i=0; i<ABS(ncoeff); i++)
3239 instr.push_back(optim.coeff[i]);
3244 if (optim.type >= 4) {
3246 int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3247 int varnumx = ABS(optim.arg1) - 1;
3248 int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3249 int varnumy = ABS(optim.arg2) - 1;
3252 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3253 WORD *e = ebegin + optim.eqnidxs[i];
3255 if (*(e+1) != OPER_ADD)
continue;
3257 const WORD *coeffx=NULL, *coeffy=NULL;
3258 WORD ncoeffx=0,ncoeffy=0;
3261 for (WORD *t=e+3; *t!=0; t+=*t) {
3262 if (*t == ABS(*(t+*t-1))+1)
continue;
3263 if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
3265 ncoeffx = *(t+*t-1);
3267 if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
3269 ncoeffy = *(t+*t-1);
3275 if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
3277 if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
3279 vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
3290 for (WORD *t=e+3; *t!=0;) {
3292 if (*t == ABS(*(t+*t-1))+1 ||
3293 (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3294 !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3295 memmove(newt, t, dt*
sizeof(WORD));
3301 *newt++ = 5 + ABS(ncoeffx);
3302 *newt++ = EXTRASYMBOL;
3306 for (
int j=0; j<ABS(ncoeffx); j++)
3322 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3334 instr.push_back(newid);
3335 instr.push_back(OPER_ADD);
3336 instr.push_back(20);
3338 instr.push_back(vartypex);
3340 instr.push_back(varnumx);
3346 instr.push_back(vartypey);
3348 instr.push_back(varnumy);
3352 instr.push_back(3*(optim.type==4?1:-1));
3357 vector<int> renum(newid+1, 0);
3358 bool do_renum=
false;
3361 ebegin = &*instr.begin();
3363 for (
int i=0; i<(int)optim.eqnidxs.size(); i++) {
3364 WORD *e = ebegin + optim.eqnidxs[i];
3366 if (*t==0)
continue;
3367 if (*(t+*t)!=0)
continue;
3368 if (*t!=8)
continue;
3369 if (*(t+4)!=1)
continue;
3370 if (*(t+5)!=1 || *(t+6)!=1)
continue;
3373 renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
3384 WORD *eend = ebegin+instr.size();
3386 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3387 for (WORD *t=e+3; *t!=0; t+=*t) {
3388 if (*t == ABS(*(t+*t-1))+1)
continue;
3389 if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
3390 *(t+*t-1) *= SGN(renum[*(t+3)]);
3391 *(t+3) = ABS(renum[*(t+3)]) - 1;
3399 MesPrint (
"*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3436 MesPrint (
"*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3442 vector<int> instr_idx(n);
3443 WORD *ebegin = &*instr.begin();
3444 WORD *eend = ebegin+instr.size();
3445 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3446 instr_idx[*e] = e - ebegin;
3455 WORD *numpar = (WORD *)Malloc1(nmax*
sizeof(WORD),
"numpar");
3456 for (
int i = 0; i < nmax; i++ ) numpar[i] = 0;
3458 for (WORD *e=ebegin; e!=eend; e+=*(e+2))
3459 for (WORD *t=e+3; *t!=0; t+=*t) {
3460 if (*t == ABS(*(t+*t-1))+1)
continue;
3461 if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
3465 for (
int i=0; i<n; i++) {
3466 WORD *e = &*instr.begin() + instr_idx[i];
3467 if (*(e+1) != OPER_ADD)
continue;
3472 for (WORD *t=e+3; *t!=0; t+=*t) {
3473 if (*t==ABS(*(t+*t-1))+1)
continue;
3477 cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
3480 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3481 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3482 if (*(t2+1) != OPER_MUL)
continue;
3483 for (t2+=3; *t2!=0; t2+=*t2) {
3484 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
3486 cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
3493 for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
3494 if (it->second > best) { x=it->first; best=it->second; }
3498 if (best>=2 && best>improve) {
3500 vector<WORD> new_eqn;
3501 new_eqn.push_back(n);
3502 new_eqn.push_back(OPER_ADD);
3503 new_eqn.push_back(0);
3507 for (WORD *t=e+3; *t!=0; t+=dt) {
3511 if (*t!=ABS(*(t+*t-1))+1) {
3515 WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
3517 new_eqn.push_back(*t-4);
3518 new_eqn.insert(new_eqn.end(), t+5, t+dt);
3524 if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3525 WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3526 if (*(t2+1) == OPER_MUL) {
3528 for (t2+=3; *t2!=0; t2+=*t2) {
3529 if (*t2 == ABS(*(t2+*t2-1))+1)
continue;
3530 WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
3532 if (y==x && *(t2+4)==1) {
3536 WORD sign = SGN(*(tend-1));
3537 while (*tend!=0) tend+=*tend;
3538 int dt2 = tend - (t2+*t2);
3539 memmove(t2, t2+*t2, (dt2+1)*
sizeof(WORD));
3548 int thisidx=new_eqn.size();
3549 new_eqn.insert(new_eqn.end(), t, t+dt);
3550 t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
3554 if (*t2 == ABS(*(t2+*t2-1))+1) {
3555 if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
3557 WORD sign = SGN(new_eqn.back());
3558 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3559 new_eqn.insert(new_eqn.end(), t2, t2+*t2);
3560 new_eqn.back() *= sign;
3566 UWORD *tmp = NumberMalloc(
"partial_factorize");
3568 MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
3569 (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
3571 new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3572 new_eqn.push_back(ABS(ntmp)+1);
3573 new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
3574 NumberFree(tmp,
"partial_factorize");
3578 else if (*(t2+4)==1) {
3580 new_eqn.back() *= SGN(*(t2+*t2-1));
3581 new_eqn[thisidx+1] = *(t2+1);
3582 new_eqn[thisidx+2] = *(t2+2);
3583 new_eqn[thisidx+3] = *(t2+3);
3584 new_eqn[thisidx+4] = *(t2+4);
3595 memmove(newt, t, dt*
sizeof(WORD));
3601 new_eqn.push_back(0);
3602 new_eqn[2] = new_eqn.size();
3604 bool empty = newt == e+3;
3605 if ( n+1 >= nmax ) {
3606 int i, newnmax = nmax*2;
3607 WORD *newnumpar = (WORD *)Malloc1(newnmax*
sizeof(WORD),
"newnumpar");
3608 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3609 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3610 M_free(numpar,
"numpar");
3621 *newt++ = EXTRASYMBOL;
3632 instr_idx.push_back(instr.size());
3633 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3637 new_eqn.push_back(n);
3638 new_eqn.push_back(OPER_MUL);
3639 new_eqn.push_back(20);
3640 new_eqn.push_back(8);
3644 new_eqn.push_back(SYMBOL);
3645 new_eqn.push_back(4);
3646 new_eqn.push_back(x-1);
3647 new_eqn.push_back(1);
3650 new_eqn.push_back(EXTRASYMBOL);
3651 new_eqn.push_back(4);
3652 new_eqn.push_back(-x-1);
3653 new_eqn.push_back(1);
3655 new_eqn.push_back(1);
3656 new_eqn.push_back(1);
3657 new_eqn.push_back(3);
3658 new_eqn.push_back(8);
3660 new_eqn.push_back(EXTRASYMBOL);
3661 new_eqn.push_back(4);
3662 new_eqn.push_back(n-1);
3663 new_eqn.push_back(1);
3664 new_eqn.push_back(1);
3665 new_eqn.push_back(1);
3666 new_eqn.push_back(3);
3667 new_eqn.push_back(0);
3671 instr_idx.push_back(instr.size());
3672 instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3673 if ( n+1 >= nmax ) {
3674 int i, newnmax = nmax*2;
3675 WORD *newnumpar = (WORD *)Malloc1(newnmax*
sizeof(WORD),
"newnumpar");
3676 for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3677 for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3678 M_free(numpar,
"numpar");
3687 e = &*instr.begin() + instr_idx[i];
3689 memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*
sizeof(WORD));
3698 MesPrint (
"*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3700 M_free(numpar,
"numpar");
3731 MesPrint (
"*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3732 thetime_str().c_str(), old_num_oper);
3737 WORD *ebegin = &*instr.begin();
3738 WORD *eend = ebegin+instr.size();
3741 int final_eqn_idx = 0;
3744 for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3746 final_eqn_idx = e-ebegin;
3750 int old_next_eqn = next_eqn;
3756 vector<int> add_eqnidxs;
3759 int num_do_optims = max(AO.Optimize.greedyminnum, (
int)optim.size()*AO.Optimize.greedymaxperc/100);
3763 for (
int i=0; i<(int)optim.size(); i++)
3764 best_improve = max(best_improve, optim[i].improve);
3765 if (best_improve <= 1)
3766 num_do_optims = MAXPOSITIVE;
3768 while (optim.size() > 0 && num_do_optims-- > 0) {
3773 for (
int i=0; i<(int)optim.size(); i++)
3774 if (optim[i].improve > best_improve) {
3776 best_improve=optim[i].improve;
3780 for (
int i=0; i<(int)add_eqnidxs.size(); i++)
3781 optim[best].eqnidxs.push_back(add_eqnidxs[i]);
3784 int next_idx = instr.size();
3787 add_eqnidxs.push_back(next_idx);
3790 optim.erase(optim.begin()+best);
3797 if (next_eqn == old_next_eqn)
break;
3801 instr.push_back(next_eqn);
3802 instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
3805 instr[final_eqn_idx+3] = 0;
3808 WORD *t = &instr[0];
3810 ebegin = &*instr.begin();
3811 eend = ebegin+instr.size();
3814 for (WORD *e=ebegin; e!=eend; e+=de) {
3817 while (*(e+n) != 0) n+=*(e+n);
3819 memmove (t, e, n*
sizeof(WORD));
3824 instr.resize(t - &instr[0]);
3827 MesPrint (
"*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3870 MesPrint (
"*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3874 vector<const WORD *> instr;
3875 const WORD *tbegin = &*all_instr.begin();
3876 const WORD *tend = tbegin+all_instr.size();
3877 for (
const WORD *t=tbegin; t!=tend; t+=*(t+2))
3879 int n = instr.size();
3884 vector<int> vars_needed(n);
3885 vector<bool> vis(n,
false);
3886 vector<vector<int> > conn(n);
3891 while (!s.empty()) {
3892 int i=s.top(); s.pop();
3895 if (vis[i])
continue;
3900 for (
const WORD *t=instr[i]+3; *t!=0; t+=*t)
3901 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3903 conn[i].push_back(k);
3911 vector<pair<int,int> > need;
3912 for (
int j=0; j<(int)conn[i].size(); j++)
3913 need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
3916 if (*(instr[i]+1) != OPER_COMMA)
3917 sort(need.rbegin(), need.rend());
3920 for (
int j=0; j<(int)need.size(); j++) {
3921 vars_needed[i] = max(vars_needed[i], need[j].first+j);
3922 conn[i][j] = need[j].second;
3929 vector<int> order, first(n,0), last(n,0);
3930 vis = vector<bool>(n,
false);
3933 while (!s.empty()) {
3935 int i=s.top(); s.pop();
3939 if (vis[i])
continue;
3942 for (
int j=(
int)conn[i].size()-1; j>=0; j--)
3943 s.push(conn[i][j]+1);
3947 first[i] = last[i] = order.size();
3949 for (
int j=0; j<(int)conn[i].size(); j++) {
3951 last[k] = max(last[k], first[i]);
3960 vector<int> renum(n);
3962 for (
int i=0; i<(int)order.size(); i++) {
3963 for (
int j=0; j<(int)conn[order[i]].size(); j++) {
3964 int k = conn[order[i]][j];
3965 if (last[k] == i) var.insert(renum[k]);
3968 if (var.empty()) var.insert(numvar++);
3969 renum[order[i]] = *var.begin(); var.erase(var.begin());
3975 vector<WORD> newinstr;
3977 for (
int i=0; i<(int)order.size(); i++) {
3979 int j = newinstr.size();
3980 newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
3982 newinstr[j] = renum[newinstr[j]];
3984 for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
3985 if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
3986 *(t+3) = renum[*(t+3)];
3990 MesPrint (
"*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
4017 MesPrint (
"*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4024 LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
4025 if (time_limit == 0) time_limit=MAXPOSITIVE;
4028 LOCK(optimize_lock);
4029 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4030 optimize_best_Horner_schemes.pop_back();
4031 UNLOCK(optimize_lock);
4043 if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
4048 if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
4062 LOCK(optimize_lock);
4063 if (num_oper < optimize_best_num_oper) {
4064 optimize_num_vars = Horner_scheme.size();
4065 optimize_best_num_oper = num_oper;
4066 optimize_best_instr = instr;
4067 optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
4069 UNLOCK(optimize_lock);
4072 AN.poly_num_vars = 0;
4073 M_free(AN.poly_vars,
"poly_vars");
4076 MesPrint (
"*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4087 void PF_optimize_expression_given_Horner_master_init () {
4092 int PF_optimize_expression_given_Horner_master_next() {
4102 void PF_optimize_expression_given_Horner_master () {
4105 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4109 vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4110 optimize_best_Horner_schemes.pop_back();
4113 int next = PF_optimize_expression_given_Horner_master_next();
4117 int len = Horner_scheme.size();
4123 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4129 void PF_optimize_expression_given_Horner_master_wait () {
4132 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4136 for (
int i = 1; i < PF.numtasks; i++) {
4137 int next = PF_optimize_expression_given_Horner_master_next();
4146 optimize_best_num_oper = INT_MAX;
4147 for (
int i = 1; i < PF.numtasks; i++) {
4155 if (len >= optimize_best_num_oper)
continue;
4158 optimize_best_num_oper = len;
4160 optimize_best_instr.resize(len);
4163 optimize_best_vars.resize(len);
4166 optimize_num_vars = optimize_best_vars.size();
4170 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4176 void PF_optimize_expression_given_Horner_slave () {
4179 MesPrint (
"*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4182 optimize_best_Horner_schemes.clear();
4183 optimize_best_num_oper = INT_MAX;
4201 if (len == 0)
break;
4204 optimize_best_Horner_schemes.push_back(vector<WORD>());
4205 vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4206 Horner_scheme.resize(len);
4214 if (optimize_best_num_oper != INT_MAX) {
4215 len = optimize_best_instr.size();
4218 len = optimize_best_vars.size();
4225 MesPrint (
"*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4245 VOID
generate_output (
const vector<WORD> &instr,
int exprnr,
int extraoffset,
const vector<vector<WORD> > &brackets) {
4248 MesPrint (
"*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4252 vector<WORD> output;
4257 int maxsize = (int)instr.size();
4258 for (
int i=0; i<maxsize; i+=instr[i+2]) num++;
4259 int *tstart = (
int *)Malloc1(num*
sizeof(
int),
"nplaces");
4261 for (
int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
4262 for (
int j=0; j<num; j++) {
4266 WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
4269 AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
4272 for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4273 if (*t == ABS(*(t+*t-1))+1)
continue;
4274 if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
4275 if (*(t+1)==EXTRASYMBOL) {
4277 *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
4284 if (instr[i+1]==OPER_MUL) {
4286 WORD *now=AT.WorkPointer+1;
4291 for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
4293 if (*t == ABS(*(t+*t-1))+1) {
4295 memmove(AT.WorkPointer+instr[i+2], t, *t*
sizeof(WORD));
4301 memmove(now, t+1, n*
sizeof(WORD));
4303 coeff_sign *= SGN(*(t+dt-1));
4309 int n = *(AT.WorkPointer + instr[i+2]) - 1;
4310 memmove(now, AT.WorkPointer+instr[i+2]+1, n*
sizeof(WORD));
4320 *(now-1) *= coeff_sign;
4322 *AT.WorkPointer = now - AT.WorkPointer;
4328 if (instr[i+1]==OPER_COMMA) {
4329 WORD *start = AT.WorkPointer + instr[i+2];
4332 for (
const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4333 if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) )
break;
4334 *now++ = *t + brackets[b].size();
4335 if (brackets[b].size() != 0) {
4336 memcpy(now, &brackets[b][0], brackets[b].size()*
sizeof(WORD));
4337 now += brackets[b].size();
4339 memcpy(now, t+1, (*t-1)*
sizeof(WORD));
4344 memmove(AT.WorkPointer, start, (now-start)*
sizeof(WORD));
4349 if (i+instr[i+2]<(
int)instr.size())
4350 output.push_back(instr[i]+extraoffset);
4352 output.push_back(-(exprnr+1));
4357 while (*(AT.WorkPointer+n)!=0)
4358 n += *(AT.WorkPointer+n);
4360 output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
4364 output.push_back(0);
4367 if (AO.OptimizeResult.code != NULL)
4368 M_free(AO.OptimizeResult.code,
"optimize output");
4370 M_free(tstart,
"nplaces");
4373 AO.OptimizeResult.codesize = output.size();
4374 AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*
sizeof(WORD),
"optimize output");
4375 memcpy(AO.OptimizeResult.code, &output[0], output.size()*
sizeof(WORD));
4378 MesPrint (
"*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4397 MesPrint (
"*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4402 WORD *oldWorkPointer = AT.WorkPointer;
4404 CBUF *C = cbuf+AC.cbufnum;
4405 WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
4408 SetScratch(AR.infile,&(e->onfile));
4410 if ( GetTerm(BHEAD term) <= 0 ) {
4411 MesPrint(
"Expression %d has problems in scratchfile",exprnr);
4414 SeekScratch(AR.outfile,&position);
4415 e->onfile = position;
4416 if (
PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
4417 MesPrint(
"Expression %d has problems in output scratchfile",exprnr);
4421 AR.CurExpr = exprnr;
4426 WORD *t = AO.OptimizeResult.code;
4428 WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
4430 bool is_expr = *t < 0;
4434 memcpy(AT.WorkPointer, t, *t*
sizeof(WORD));
4435 Generator(BHEAD AT.WorkPointer, C->numlhs);
4445 if (
EndSort(BHEAD NULL,0) < 0) {
4450 AT.WorkPointer = oldWorkPointer;
4451 AR.CurExpr = oldcurexpr;
4454 MesPrint (
"*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4477 MesPrint (
"*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4479 if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
4489 WORD *t = AO.OptimizeResult.code;
4493 t++;
while (*t!=0) t+=*t; t++;
4495 WORD **tstart = (WORD **)Malloc1(num*
sizeof(WORD *),
"print objects");
4496 t = AO.OptimizeResult.code; num = 0;
4499 t++;
while (*t!=0) t+=*t; t++;
4502 int halfnum = num/2;
4503 for (
int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
4504 for (
int i = 0; i < num; i++ ) {
4507 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4508 else if (print_expr)
4509 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4511 CBUF *C = cbuf + AM.sbufnum;
4512 if (C->numrhs >= AO.OptimizeResult.minvar)
4513 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4517 CBUF *C = cbuf + AM.sbufnum;
4518 if (C->numrhs >= AO.OptimizeResult.minvar)
4519 PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4521 WORD *t = AO.OptimizeResult.code;
4524 PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4526 else if (print_expr)
4527 PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4529 while (*t!=0) t+=*t;
4535 MesPrint (
"*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4589 #if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY)) 4591 struct save_printflag {
4593 oldprintflag = AS.printflag;
4597 AS.printflag = oldprintflag;
4604 MesPrint (
"*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4605 MesPrint (
"*** %"); PrintRunningTime();
4609 optimize_lock = dummylock;
4612 AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
4619 if (PF.me == MASTER)
4621 MesPrint (
"*** runtime after preparing the expression: %"); PrintRunningTime();
4624 if (optimize_expr[0]==0 ||
4625 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
4626 (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
4627 optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
4628 if (AO.OptimizeResult.code != NULL)
4629 M_free(AO.OptimizeResult.code,
"optimize output");
4634 AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*
sizeof(WORD),
"optimize output");
4635 AO.OptimizeResult.code[0] = -(exprnr+1);
4636 memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*
sizeof(WORD));
4637 AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
4641 optimize_best_Horner_schemes.clear();
4642 if (AO.Optimize.horner == O_OCCURRENCE) {
4643 if (AO.Optimize.hornerdirection != O_BACKWARD)
4644 optimize_best_Horner_schemes.push_back(
occurrence_order(optimize_expr,
false));
4645 if (AO.Optimize.hornerdirection != O_FORWARD)
4646 optimize_best_Horner_schemes.push_back(
occurrence_order(optimize_expr,
true));
4649 if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
4650 optimize_best_Horner_schemes.push_back(simulated_annealing());
4653 mcts_best_schemes.clear();
4654 if ( AO.inscheme ) {
4655 optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
4656 for (
int i=0; i < AO.schemenum; i++ )
4657 optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
4660 for (
int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
4663 for (
set<pair<
int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
4664 optimize_best_Horner_schemes.push_back(i->second);
4666 MesPrint (
"{%a} -> %d",i->second.size(), &i->second[0], i->first);
4676 if (PF.me == MASTER)
4678 MesPrint (
"*** runtime after Horner: %"); PrintRunningTime();
4682 if (PF.me == MASTER ) {
4683 PF_optimize_expression_given_Horner_master_init();
4687 optimize_best_num_oper = INT_MAX;
4689 int imax = (int)optimize_best_Horner_schemes.size();
4691 for (
int i=0; i<imax; i++) {
4692 #if defined(WITHPTHREADS) 4693 if (AM.totalnumberofthreads > 1)
4694 optimize_expression_given_Horner_threaded();
4696 #elif defined(WITHMPI) 4697 if (PF.numtasks > 1)
4698 PF_optimize_expression_given_Horner_master();
4705 PF_optimize_expression_given_Horner_master_wait();
4708 if (PF.numtasks > 1)
4709 PF_optimize_expression_given_Horner_slave();
4719 if (PF.me == MASTER)
4721 generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
4725 if (AO.OptimizeResult.code == NULL) {
4726 AO.OptimizeResult.code = (WORD *)Malloc1(
sizeof(WORD),
"optimize output");
4733 if (PF.me == MASTER) {
4735 PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4736 PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4739 if (PF.me != MASTER) {
4740 PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4741 PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4747 snprintf (str,100,
"%d",AO.OptimizeResult.minvar);
4748 PutPreVar((UBYTE *)
"optimminvar_",(UBYTE *)str,0,1);
4749 snprintf (str,100,
"%d",AO.OptimizeResult.maxvar);
4750 PutPreVar((UBYTE *)
"optimmaxvar_",(UBYTE *)str,0,1);
4754 if (PF.me == MASTER)
4761 if (PF.me == MASTER)
4767 if (PF.me == MASTER) {
4770 if ( AO.Optimize.printstats > 0 ) {
4775 snprintf(str,20,
"%d",numop);
4776 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)str,0,1);
4781 snprintf(str,20,
"%d",numop);
4782 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)str,0,1);
4785 if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
4787 UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
4789 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4791 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4792 TokenToLine((UBYTE *)
" Scheme selected: ");
4793 for ( i = 0; i < optimize_num_vars; i++ ) {
4795 sym = optimize_best_vars[i];
4796 if ( i > 0 ) TokenToLine((UBYTE *)
",");
4797 if ( sym < NumSymbols ) {
4798 StrCopy(FindSymbol(sym),OutScr);
4802 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4803 if ( AC.extrasymbols == 0 ) {
4804 Out = NumCopy((MAXVARIABLES-sym),Out);
4805 Out = StrCopy((UBYTE *)
"_",Out);
4807 else if ( AC.extrasymbols == 1 ) {
4808 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4811 TokenToLine(OutScr);
4813 TokenToLine((UBYTE *)
";");
4816 AO.OutputLine = old1;
4821 UBYTE *OutScr, *Out, *outstring = 0;
4823 AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4824 OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4825 for ( i = 0; i < optimize_num_vars; i++ ) {
4827 sym = optimize_best_vars[i];
4828 if ( sym < NumSymbols ) {
4829 StrCopy(FindSymbol(sym),OutScr);
4833 Out = StrCopy((UBYTE *)AC.extrasym,Out);
4834 if ( AC.extrasymbols == 0 ) {
4835 Out = NumCopy((MAXVARIABLES-sym),Out);
4836 Out = StrCopy((UBYTE *)
"_",Out);
4838 else if ( AC.extrasymbols == 1 ) {
4839 Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4842 outstring = AddToString(outstring,OutScr,1);
4844 if ( outstring == 0 ) {
4845 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)
"",0,1);
4848 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)outstring,0,1);
4849 M_free(outstring,
"AddToString");
4857 if ( PF.me == MASTER ) {
4863 value = GetPreVar((UBYTE *)
"optimvalue_", WITHERROR);
4864 bytes = strlen((
char *)value);
4865 PF_LongMultiPack(&bytes, 1, PF_INT);
4866 PF_LongMultiPack(value, bytes, PF_BYTE);
4868 value = GetPreVar((UBYTE *)
"optimscheme_", WITHERROR);
4869 bytes = strlen((
char *)value);
4870 PF_LongMultiPack(&bytes, 1, PF_INT);
4871 PF_LongMultiPack(value, bytes, PF_BYTE);
4874 if ( PF.me != MASTER ) {
4875 static vector<UBYTE> prevarbuf;
4879 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4880 prevarbuf.reserve(bytes + 1);
4881 value = &prevarbuf[0];
4882 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4883 value[bytes] =
'\0';
4884 PutPreVar((UBYTE *)
"optimvalue_", value, NULL, 1);
4886 PF_LongMultiUnpack(&bytes, 1, PF_INT);
4887 prevarbuf.reserve(bytes + 1);
4888 value = &prevarbuf[0];
4889 PF_LongMultiUnpack(value, bytes, PF_BYTE);
4890 value[bytes] =
'\0';
4891 PutPreVar((UBYTE *)
"optimscheme_", value, NULL, 1);
4896 M_free(optimize_expr,
"LoadOptim");
4899 MesPrint (
"*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4929 if ( AO.OptimizeResult.code != NULL ) {
4930 M_free(AO.OptimizeResult.code,
"optimize output");
4931 AO.OptimizeResult.code = NULL;
4932 AO.OptimizeResult.codesize = 0;
4933 PutPreVar((UBYTE *)
"optimminvar_",(UBYTE *)(
"0"),0,1);
4934 PutPreVar((UBYTE *)
"optimmaxvar_",(UBYTE *)(
"0"),0,1);
4935 PruneExtraSymbols(AO.OptimizeResult.minvar-1);
4936 cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
4937 AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
4939 if ( AO.OptimizeResult.nameofexpr != NULL ) {
4946 if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
4947 MesPrint(
"@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
4950 M_free(AO.OptimizeResult.nameofexpr,
"optimize expression name");
4951 AO.OptimizeResult.nameofexpr = NULL;
4952 w = &(Expressions[numexpr].status);
4953 *w = SetExprCases(DROP,1,*w);
4954 if ( *w < 0 ) error = 1;
4956 snprintf (str,20,
"%d",cbuf[AM.sbufnum].numrhs);
4957 PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
4958 PutPreVar((UBYTE *)
"optimvalue_",(UBYTE *)(
"0"),0,1);
4959 PutPreVar((UBYTE *)
"optimscheme_",(UBYTE *)(
"0"),0,1);
int Optimize(WORD exprnr, int do_print)
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
WORD getpower(const WORD *term, int var, int pos)
vector< WORD > optimize_greedy(vector< WORD > instr, LONG time_limit)
void optimize_expression_given_Horner()
vector< WORD > merge_operators(const vector< WORD > &all_instr, bool move_coeff)
void PF_BroadcastBuffer(WORD **buffer, LONG *length)
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
vector< WORD > Horner_tree(const WORD *expr, const vector< WORD > &order)
vector< optimization > find_optimizations(const vector< WORD > &instr)
LONG get_expression(int exprnr)
int PF_LongMultiBroadcast(void)
bool term_compare(const WORD *a, const WORD *b)
void fixarg(UWORD *t, WORD &n)
void build_Horner_tree(const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
void my_random_shuffle(PHEAD RandomAccessIterator fr, RandomAccessIterator to)
WORD StoreTerm(PHEAD WORD *)
WORD generate_expression(WORD exprnr)
vector< WORD > occurrence_order(const WORD *expr, bool rev)
vector< WORD > recycle_variables(const vector< WORD > &all_instr)
int PF_PrepareLongMultiPack(void)
int PF_LongSingleSend(int to, int tag)
bool do_optimization(const optimization optim, vector< WORD > &instr, int newid)
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
VOID optimize_print_code(int print_expr)
int partial_factorize(vector< WORD > &instr, int n, int improve)
int count_operators(const WORD *expr, bool print=false)
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
int PF_Send(int to, int tag)
WORD Generator(PHEAD WORD *, WORD)
vector< WORD > generate_instructions(const vector< WORD > &tree, bool do_CSE)
int PF_PrepareLongSinglePack(void)
int count_operators_cse(const vector< WORD > &tree)
VOID generate_output(const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
int PF_Receive(int src, int tag, int *psrc, int *ptag)
LONG EndSort(PHEAD WORD *, int)
vector< vector< WORD > > get_brackets()