22 #ifndef _SGSOLVER_MAXMINMAX_GRB_HPP
23 #define _SGSOLVER_MAXMINMAX_GRB_HPP
25 #include "sgcommon.hpp"
28 #include "sgexception.hpp"
29 #include "gurobi_c++.h"
51 vector<bool> feasibleActions;
52 int numFeasibleActions;
54 int numActions_grandTotal;
70 const vector< vector< SGPoint> > &
payoffs;
71 const vector< vector< vector<double> > > & prob;
72 const vector< vector<int> > & numActions;
73 const vector< int > & numActions_total;
79 const double regimeChangeTol = 1e-9;
80 const double pseudoConstrTol = 1e-3;
81 const double convTol = 1e-6;
82 const int maxIter = 1e3;
117 prob(_game.getProbabilities()),
118 numActions(_game.getNumActions()),
119 numActions_total(_game.getNumActions_total()),
120 numStates(_game.getNumStates()),
121 delta(_game.getDelta()),
147 double iterate(
const SGSolverMode mode,
150 void addBoundingHyperplane(
SGPoint & currDir,
155 list<SGPoint> & newDirections,
156 list< vector<double> > & newBounds,
158 const bool addDirection);
159 void printIteration(ofstream & ofs,
int numIter);
166 double movement = 1.0;
170 SGSolverMode mode = SG_MAXMINMAX;
181 ofs.open(
"sgsolver_v3.log");
187 printIteration(ofs, numIter);
190 while (movement > convTol
191 && numIter < maxIter)
193 movement =
iterate(mode,steps);
195 cout <<
"Iteration: " << numIter
197 <<
", steps: " << steps
198 <<
", movement: " << movement
204 printIteration(ofs, numIter);
207 catch (GRBException & e)
209 cout <<
"GRB Exception caught: " << e.getMessage() << endl;
214 cout <<
"Converged!" << endl;
220 for (
int s = 0; s < numStates; s++)
222 for (
int a = 0; a < numActions_total[s]; a++)
224 for (
int p = 0; p < numPlayers; p++)
239 env.set(GRB_IntParam_Method,5);
242 env.set(GRB_DoubleParam_OptimalityTol,1e-9);
243 env.set(GRB_DoubleParam_MarkowitzTol,0.999);
244 env.set(GRB_DoubleParam_FeasibilityTol,1e-9);
246 env.set(GRB_IntParam_OutputFlag,0);
249 vector<int> numEqActions(numStates,0);
251 vector<int> actions, deviations;
254 numActions_grandTotal = 0;
259 numActions_grandTotal += numActions_total[s];
260 numEqActions[s] = numActions_total[s];
264 for (
int a = 0; a <
eqActions[s].size(); a++)
268 numActions_grandTotal ++;
274 feasibleActions = vector<bool>(numActions_grandTotal,
true);
275 numFeasibleActions = numActions_grandTotal;
277 gaToS = vector<int> (numActions_grandTotal,0);
278 gaToA = vector<int> (numActions_grandTotal,0);
281 for (
int s = 0; s < numStates; s++)
283 for (
int a = 0; a < numActions_total[s]; a++)
296 if (mode != SG_FEASIBLE)
299 GRBVar * valueFn = model.addVars(numStates);
300 GRBVar * contVals = model.addVars(numActions_grandTotal);
301 GRBVar * valueFunSlacks = model.addVars(numActions_grandTotal);
302 GRBVar * pseudoContVals = model.addVars(numActions_grandTotal);
303 GRBVar * recursiveContValSlacks = model.addVars(numActions_grandTotal);
304 GRBVar * APSContValSlacks = model.addVars(numActions_grandTotal);
305 GRBVar * APSContValVar = model.addVars(numActions_grandTotal);
306 GRBVar * feasMult = model.addVars(numActions_grandTotal*
numDirections);
307 GRBVar * ICMult = model.addVars(numActions_grandTotal*numPlayers);
308 GRBVar * currDirVar = model.addVars(2);
313 currDirVar[0].set(GRB_DoubleAttr_LB,-GRB_INFINITY);
314 currDirVar[1].set(GRB_DoubleAttr_LB,-GRB_INFINITY);
316 GRBConstr xConstr = model.addConstr(currDirVar[0] == 0.0);
317 GRBConstr yConstr = model.addConstr(currDirVar[1] == 1.0);
318 vector< vector<GRBConstr> > valueFnConstr (numStates);
319 for (
int s = 0; s < numStates; s++)
321 valueFnConstr[s] = vector<GRBConstr> (numEqActions[s]);
324 vector<GRBLinExpr> recursiveContVal(numActions_grandTotal,0);
325 vector<SGRegimeStatus> regimeStatus(numActions_grandTotal,SG_RECURSIVE);
326 vector<int> optActions(numStates,-1);
329 vector<SGICStatus> optICStatuses(numStates,SG_NONE);
332 for (
int ga = 0; ga < numActions_grandTotal; ga++)
334 contVals[ga].set(GRB_DoubleAttr_LB,-2*
payoffBound);
335 pseudoContVals[ga].set(GRB_DoubleAttr_LB,-2*
payoffBound);
336 APSContValVar[ga].set(GRB_DoubleAttr_LB,-2*
payoffBound);
338 for (
int s = 0; s < numStates; s++)
339 valueFn[s].set(GRB_DoubleAttr_LB,-GRB_INFINITY);
341 GRBLinExpr objective = 0;
344 for (
int s = 0; s < numStates; s++)
346 objective += 1e10*valueFn[s];
350 for (
int a = 0; a < numActions_total[s]; a++)
355 for (
int sp = 0; sp < numStates; sp++)
356 recursiveContVal[ga] += prob[s][a][sp]*valueFn[sp];
358 GRBLinExpr APSContVal = 0;
359 if (mode==SG_MAXMINMAX || mode==SG_APS)
362 for (
int p = 0; p < numPlayers; p++)
366 APSContVal -= ICMult[p+2*ga]*minIC;
370 list< vector<double> >::const_iterator bnd;
372 for (bnd =
bounds.begin(),
378 for (
int sp = 0; sp < numStates; sp++)
379 expBnd += (*bnd)[sp] * prob[s][a][sp];
381 APSContVal += expBnd * feasMult[ga+dirCtr*numActions_grandTotal];
386 list<SGPoint>::const_iterator dir;
388 for (
int p = 0; p < numPlayers; p++)
390 GRBLinExpr dualConstrLHS = 0;
392 dualConstrLHS += ICMult[p+2*ga];
393 dualConstrLHS += currDirVar[p];
399 dualConstrLHS -= (*dir)[p] * feasMult[ga+dirCtr*numActions_grandTotal];
402 model.addConstr(dualConstrLHS==0);
406 model.addConstr(pseudoContVals[ga] >= APSContVal);
407 model.addConstr(contVals[ga] == APSContVal+APSContValSlacks[ga]);
408 model.addConstr(APSContValVar[ga]==APSContVal);
410 model.addConstr(pseudoContVals[ga]
411 >= recursiveContVal[ga]*(1.0+pseudoConstrTol));
412 model.addConstr(contVals[ga] == recursiveContVal[ga]+recursiveContValSlacks[ga]);
414 GRBLinExpr valueFnConstrRHS = 0;
415 for (
int p = 0; p < numPlayers; p++)
416 valueFnConstrRHS += (1-delta)*
payoffs[s][a][p]*currDirVar[p];
418 valueFnConstrRHS += delta*contVals[ga];
420 valueFnConstr[s][actr] = model.addConstr(valueFn[s] == valueFnConstrRHS
421 +valueFunSlacks[ga]);
430 for (
int ga = 0; ga < numActions_grandTotal; ga++)
435 regimeStatus[ga] = SG_FIXED;
436 APSContValSlacks[ga].set(GRB_DoubleAttr_LB,0);
437 recursiveContValSlacks[ga].set(GRB_DoubleAttr_LB,-GRB_INFINITY);
442 APSContValSlacks[ga].set(GRB_DoubleAttr_LB,-GRB_INFINITY);
443 recursiveContValSlacks[ga].set(GRB_DoubleAttr_LB,0);
448 for (
int ga = 0; ga < numActions_grandTotal; ga++)
450 objective += pseudoContVals[ga];
452 objective += APSContValVar[ga];
455 model.setObjective(objective,GRB_MINIMIZE);
457 list< vector<double> > newBounds(0);
458 list<SGPoint> newDirections(0);
459 SGTuple newThreatTuple(numStates);
461 bool passNorth =
false;
462 bool newQuadrant =
true;
463 SGQuadrant quadrant = SG_NORTHEAST;
473 bool regimesSubOptimal =
true;
474 int regimeChangeIters = 0;
476 vector<SGRegimeStatus> oldRegimeStatus(regimeStatus);
477 vector<int> oldOptActions(optActions);
478 vector<SGICStatus> oldOptICStatuses(optICStatuses);
481 while (regimesSubOptimal
482 && regimeChangeIters < 10*numActions_grandTotal)
486 if (model.get(GRB_IntAttr_Status)!=GRB_OPTIMAL)
488 cout <<
"Warning: model not optimal. Code is: "
489 << model.get(GRB_IntAttr_Status) << endl;
491 if (model.get(GRB_IntAttr_Status)==GRB_UNBOUNDED)
493 cout <<
"Warning: Unbounded model" << endl;
495 if (model.get(GRB_IntAttr_Status)==GRB_INFEASIBLE)
497 cout <<
"Warning: Infeasible model" << endl;
502 if (mode!=SG_MAXMINMAX)
506 double maxSlack = 0.0;
507 for (
int ga = 0; ga < numActions_grandTotal; ga++)
510 tmp = recursiveContVal[ga].getValue()
511 -APSContValVar[ga].get(GRB_DoubleAttr_X);
512 if (regimeStatus[ga]==SG_FIXED)
523 regimesSubOptimal = (maxSlack > regimeChangeTol);
524 if (!regimesSubOptimal)
529 for (
int s = 0; s < numStates; s++)
533 for (
int a = 0; a < numActions_total[s]; a++)
539 if (valueFunSlacks[ga].get(GRB_IntAttr_VBasis) == -1)
543 if (regimeStatus[ga] == SG_FIXED
544 && ICMult[2*ga].get(GRB_IntAttr_VBasis)==-1
545 && ICMult[1+2*ga].get(GRB_IntAttr_VBasis)==-1)
547 regimeStatus[ga] = SG_RECURSIVE;
548 optICStatuses[s] = SG_NONE;
549 APSContValSlacks[ga].set(GRB_DoubleAttr_LB,
551 recursiveContValSlacks[ga].set(GRB_DoubleAttr_LB,0.0);
553 else if (regimeStatus[ga] == SG_FIXED)
556 if (ICMult[1+2*ga].get(GRB_IntAttr_VBasis)==-1)
557 optICStatuses[s] = SG_BINDING0;
558 else if (ICMult[2*ga].get(GRB_IntAttr_VBasis)==-1)
559 optICStatuses[s] = SG_BINDING1;
561 optICStatuses[s] = SG_BINDING01;
564 optICStatuses[s] = SG_NONE;
579 for (
int ga = 0; ga < numActions_grandTotal; ga++)
581 switch (regimeStatus[ga])
584 if (recursiveContVal[ga].getValue()
585 -APSContValVar[ga].get(GRB_DoubleAttr_X)
588 regimeStatus[ga] = SG_FIXED;
589 APSContValSlacks[ga].set(GRB_DoubleAttr_LB,0.0);
590 recursiveContValSlacks[ga].set(GRB_DoubleAttr_LB,
602 if ( (-recursiveContVal[ga].getValue()
603 +APSContValVar[ga].get(GRB_DoubleAttr_X)
606 regimeStatus[ga] = SG_RECURSIVE;
607 APSContValSlacks[ga].set(GRB_DoubleAttr_LB,
609 recursiveContValSlacks[ga].set(GRB_DoubleAttr_LB,0.0);
624 bool addDirection = newQuadrant;
625 for (
int s = 0; s < numStates; s++)
629 if (optActions[s] != oldOptActions[s]
630 || regimeStatus[optActions[s]] != oldRegimeStatus[optActions[s]]
631 || optICStatuses[s] != oldOptICStatuses[s])
636 if (regimeChangeIters >= numActions_grandTotal)
637 cout <<
"Warning: Too many regime changes. " << regimeChangeIters
638 <<
" changes but only " << numActions_grandTotal
639 <<
" action profiles." << endl;
649 tmp = xConstr.get(GRB_DoubleAttr_SARHSUp);
652 addBoundingHyperplane(currDir,xConstr,yConstr,valueFn,
653 numStates,newDirections,newBounds,model,
659 quadrant=SG_SOUTHEAST;
668 tmp = yConstr.get(GRB_DoubleAttr_SARHSLow);
671 addBoundingHyperplane(currDir,xConstr,yConstr,valueFn,
672 numStates,newDirections,newBounds,model,
678 quadrant=SG_SOUTHWEST;
683 for (
int s = 0; s < numStates; s++)
684 newThreatTuple[s][1]=-newBounds.back()[s];
693 tmp = xConstr.get(GRB_DoubleAttr_SARHSLow);
696 addBoundingHyperplane(currDir,xConstr,yConstr,valueFn,
697 numStates,newDirections,newBounds,model,
704 quadrant=SG_NORTHWEST;
709 for (
int s = 0; s < numStates; s++)
710 newThreatTuple[s][0]=-newBounds.back()[s];
719 tmp = yConstr.get(GRB_DoubleAttr_SARHSUp);
722 addBoundingHyperplane(currDir,xConstr,yConstr,valueFn,
723 numStates,newDirections,newBounds,model,
731 quadrant=SG_NORTHEAST;
742 }
while (!passNorth);
748 delete [] valueFunSlacks;
749 delete [] pseudoContVals;
750 delete [] recursiveContValSlacks;
751 delete [] APSContValSlacks;
752 delete [] APSContValVar;
755 delete [] currDirVar;
758 if (
directions.size() == newDirections.size())
761 list<SGPoint>::const_iterator dir;
762 list< vector<double> >::const_iterator bnd;
763 list<SGPoint>::const_iterator newDir;
764 list< vector<double> >::const_iterator newBnd;
767 newDir = newDirections.begin(),
768 newBnd = newBounds.begin();
770 ++dir,++bnd,++newDir,++newBnd)
774 for (
int s = 0; s < numStates; s++)
775 dist = std::max(dist,abs((*bnd)[s]-(*newBnd)[s]));
783 threatTuple = newThreatTuple;
788 void SGSolver_MaxMinMax_GRB::addBoundingHyperplane(
SGPoint & currDir,
793 list<SGPoint> & newDirections,
794 list< vector<double> > & newBounds,
796 const bool addDirection)
798 int roundScale = 1e7;
803 xConstr.set(GRB_DoubleAttr_RHS,currDir[0]);
804 yConstr.set(GRB_DoubleAttr_RHS,currDir[1]);
814 if (newDirections.size() > 2)
820 std::list<SGPoint>::const_reverse_iterator d0 = newDirections.rbegin();
821 std::list<SGPoint>::const_reverse_iterator d1 = d0++;
822 double det = (*d0)[0]*(*d1)[1]-(*d0)[1]*(*d1)[0];
826 cout <<
"Warning: Repeated direction. " << endl
827 << (*d0) <<
" " << (*d1) <<
" " << currDir << endl;
830 double a = (currDir[0]*(*d1)[1]-currDir[1]*(*d1)[0])/det;
831 double b = ((*d0)[0]*currDir[1]-(*d0)[1]*currDir[0])/det;
833 std::list<vector<double> >::const_reverse_iterator h0 = newBounds.rbegin();
834 std::list<vector<double> >::const_reverse_iterator h1 = h0++;
836 for (
int s = 0; s < numStates; s++)
837 distSum += abs(a*(*h0)[s]+b*(*h1)[s] - valueFn[s].get(GRB_DoubleAttr_X));
843 newBounds.pop_back();
844 newDirections.pop_back();
849 newDirections.push_back(currDir);
850 newBounds.push_back(vector<double>(numStates,0));
851 for (
int s = 0; s < numStates; s++)
853 double tmp = valueFn[s].get(GRB_DoubleAttr_X);
856 newBounds.back()[s] = tmp;
863 xConstr.set(GRB_DoubleAttr_RHS,currDir[0]);
864 yConstr.set(GRB_DoubleAttr_RHS,currDir[1]);
867 void SGSolver_MaxMinMax_GRB::printIteration(ofstream & ofs,
int numIter)
871 for (
int s = 0; s < numStates; s++)
875 list<SGPoint>::const_iterator dir;
876 list< vector<double> >::const_iterator bnd;
882 ofs << setprecision(12) << (*dir)[0] <<
" " << (*dir)[1];
883 for (
int s = 0; s < numStates; s++)
884 ofs <<
" " << (*bnd)[s];