adv_CAR_4c.cc - 'main' file for CAR simulations



/* 5th generation of modelling studies based on Andrew J Smith et al 05. This generation of advantage learning, started August 06, is motivated by the idea that only DA may be involved in Policy learning, while Value learning may depend possibly on DA, but also on other NTs, especially for the aversive limb. The specific prog. adv_CAR_4c is about checking out what happens if learning of policy only takes place with better-than transitions, putatively DA ergic. This version is derived from adv_like_CAR_2c . It does multiple runs and produces some bigish output files to processed by other programs. N. learning -->N. Extn. ---> DA-block-learning --> DA-block-extn. - Has 'degenerate' action states to allow for small init. random avoidance Py's. - has time-registering in safety states, not just one safety. - has cost of shuttling. - More similar output measures as the psychology papers. PrelimTrN() trials -> LrnTrN() learning trials -> 3*lrnTrN extinction trials Protocols are : 1, 2 & 3 : PrlimTrN -> no US, no Modulation 1, in addition : N. learning , Modulated extinction 2, " : Modulated Learning , Modulated extn. 3, " : Modulated Learning , 'N' (unmodulated) extn. */ #include #include #include #include // input & output streams #include // i-o parametrized manipulators #include // in-memory string manipulations for i/o, new standard #include #include #include "maths.h" #include "nr_ut.h" #include "conx2.h" // bare bones connections class #include "in_out.h" // ... to here is similar to NN-HEAD.H #include "map.h" // Return Map classes. #include "rl.h" // Reinforcement Learning setup classes. #include "rl_exp.h" // Experimental procedures/setups based on rl.h // ******************** Global Variables & Objects ***************************** // model Objects & Variables --------------------------------------------------- float T = 0.0; // Global Time Exp_CAR_I AvExp; // Whole experiment with learners in shuffle boxes ! // model Parameters ----------------------------------------------------------- float exp_ShockAmpl = 1.0 ; // for 'extinction learning' etc. float exp_appetMod = -0.85 ; // 'DA' modulation for simple exp vs. contr. float exp_AvAtt = 0.0 ; // 'aversive NT' moduln. for " " " " float prot_to_sample = 1.0 ; // From which protocol we'd like to sample in // detail. // Accessory Objects ----------------------------------------------------------- Menu25 menu; float **menuNo; char **menuSt; // to connect to menu params. // ************************ In-file Functions ********************************** void RetVMap( float *t, Vector **depVec, float *newVal ); void MakeMenu(); void Comments(); void Epochs(); // time boundaries of conditions // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ~~~~~~~~~~~~~~~~~~~~~ MAIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ int main() { int i, j, k, n, m; // ANSI C clock time-dep random seed. 604800 is no. of sec in a week : srand( ( time( NULL ) % 604800) ) ; // ************** Return Map object that will iterrate the model ************** RetVectMap AvExp_map( AvExp.VecVarPtr() , &T , AvExp.vvNum() // No. of vectorial vars. in model , RetVMap // return map 'engine' ); // *********************************** Menu *********************************** // to tell menu which parametres to adjust : menuNo = new float* [15+1]; menuSt = new char* [5] ; // see in_out.h menuNo[1] = &exp_appetMod ; menuNo[2] = &exp_AvAtt ; menuNo[3] = &AvExp.lrnTrN ; menuNo[4] = &AvExp.D ; menuNo[5] = &AvExp.lrnR ; menuNo[6] = &AvExp.lrnRPol ; menuNo[7] = &AvExp.shuttleRet ; menuNo[8] = &AvExp.Temp ; menuNo[9] = &exp_ShockAmpl ; menuNo[10] = &AvExp.vigourMod ; menuNo[11] = &AvExp.runN ; menuNo[12] = &prot_to_sample ; menuNo[13] = &AvExp.resPrevNo ; menuNo[14] = &AvExp.extnBlockTrN ; MakeMenu(); // Construct & Run menu char menuFlow = menu.GetChoices(menuNo, menuSt); // make sure all params updated ! AvExp.RefreshParams() ; // non-public params // AvExp.setShockSAmpl(zzz) is called in complicated manner w.in iteration loops // this here as just might be affected by menu : Vector endpts(2); endpts[1]=AvExp.tStart(); endpts[2]=AvExp.tEnd(); // ************************ To save simulation results ************************* // to store iteration results to memory float *tSteps ; // a little trivial for a map tSteps = vector(1, (n=roundfl(endpts[2]-endpts[1]+1)) ) ; float **tSeries ; // blow-by-blow within each trial tSeries = matrix(1,AvExp_map.TotEqN(),1,n) ; float *trials_n ; // trial number float **trials_s; // end-of-each-trial state data. float **trials_V; // end-of-each-trial Value data. float **trials_a; // end-of-each-trial action data. float **trials_pol_m; // end of trial data param. for policy - first go, then stay. // Note it doesn't cover 'safe' states as trivial policy (--> end trial float **trials_Py_go; //end of trial data Py of shuffle at each state float ** AvoidPr ; // Overall Py of avoidance for all the Runs float ** Latency ; // latency data for all the Runs // to pass integration results to plotting, disk storage etc. // NB datPtr[0] is useful auxilliary, [1] is for indep. var. vals.; float **datPtr ; int datPtrDim = AvExp_map.TotEqN() ; if (AvExp.TRunN() > datPtrDim) datPtrDim = AvExp.TRunN() ; datPtrDim +=2; datPtr = new float* [datPtrDim] ; oFile states; states.SetName("state","seq"); // various file objects for results oFile values; values.SetName("values","out"); oFile advals; advals.SetName("adv_par","pol"); // for (advantage) params for // states 1 to 6: first go, then stay. oFile a2P; a2P.SetName("py_go","pol"); // for Py_go probabilities oFile PAv; PAv.SetName("PAv","out"); // for avoidPy oFile Lat; Lat.SetName("Lat","out"); // for Latencies of avoidance / escape. // ----------------- Menu dependent loop : ------------------------ while (menuFlow != 'Q') { // ren menu choices run before loop and at its end // - - - - - - - Memory Space for results - - - - - - - - - - - - - // for trajectories of Run kept in detail : trials_n = vector(1, AvExp.RunTrialN() ) ; trials_s = matrix(1,AvExp.sSize(),1,AvExp.RunTrialN()) ; trials_a = matrix(1,AvExp.aSize(),1,AvExp.RunTrialN()) ; trials_V = matrix(1,AvExp.VSize(),1,AvExp.RunTrialN()) ; // NB next one is for policy parameters for each of the 'unsafe' side states only: trials_pol_m = matrix(1,2*AvExp.ShockS(),1,AvExp.RunTrialN()) ; trials_Py_go = matrix(1,AvExp.StateNum(),1,AvExp.RunTrialN()) ; // for all Runs : // The idea is that the output files storing these will be examined, possibly by // another program, to derive statistics of the whole thing. AvoidPr = matrix(1, AvExp.TRunN(), 1,AvExp.RunTrialN()) ; Latency = matrix(1, AvExp.TRunN(), 1,AvExp.RunTrialN()) ; // There are protN different expt. protocols to iterate over : for (k=1; k<=AvExp.ProtN(); k++) { // There are RunN runs per protocol : for(m=1; m<=AvExp.RunN(); m++) { // * * * * * * (Re-) set initial conditions & model parametres. * * * * * * * AvExp.Reset2() ; // Ready to start from scratch again - resets s,V,a,Q,p // Each experimental Run consists of runTrialN consecutive trials. for (i=1; i<=AvExp.RunTrialN(); i++) { AvExp.SetCurTrN(i) ; // US: ( (i <= AvExp.PrelimTrN()) || (i > AvExp.PrelimTrN()+AvExp.LrnTrN())) ? AvExp.setShockSAmpl(0.0) : AvExp.setShockSAmpl(exp_ShockAmpl) ; // Modulation: if (k==1) { // N. learn, modulated extn. if (i <= AvExp.PrelimTrN()+AvExp.LrnTrN() ) AvExp.appetMod = AvExp.averMod = 0.0 ; else { AvExp.appetMod = exp_appetMod ; AvExp.averMod = exp_AvAtt ; } } else if (k==2) { // Modulated learn and modulated extn. - only run-in N. if (i <= AvExp.PrelimTrN() ) AvExp.appetMod = AvExp.averMod = 0.0 ; else { AvExp.appetMod = exp_appetMod ; AvExp.averMod = exp_AvAtt ; } } else { // Rest of trials - Modulated learnin, N. extn. if ((i <= AvExp.PrelimTrN()) || (i > AvExp.PrelimTrN()+AvExp.LrnTrN())) AvExp.appetMod = AvExp.averMod = 0.0 ; else { AvExp.appetMod = exp_appetMod ; AvExp.averMod = exp_AvAtt ; } } // * * * * * * (Re-) set initial conditions & model parametres. * * * * * // set all state & action labels = 0 meaning that these times have not been, // visited, and set initial condition s[1]=1, corresp. to t=0 : AvExp.reStart() ; // - - - - - - - - - - - Iterate Map for one trial ! - - - - - - - - - AvExp_map.Iterate( &endpts // t start, t end ,tSteps // results : time index ,tSeries // results : variables. ); // - - - - - - Store data for each trial of each Run - - - - - - - - - - - - // Overall avoidance probability stored here : AvoidPr[(k-1)*AvExp.RunN()+m][i] = AvExp.PAvoidGNoRetDgn() ; // And latency here (note scaling by .1 to put all in one plot : Latency[(k-1)*AvExp.RunN()+m][i] = 0.1 * AvExp.latency() ; // - - - - - Store detailed data for trials of first run - - - - - - - - - if (k==roundfl(prot_to_sample) && m==1) { trials_n[i] = i; // states stored here: for (j=1; j<=AvExp.sSize(); j++) trials_s[j][i] = AvExp.s[j] ; // In the following, note indices<=AvExp.ShockS() as 'safe side' is uninteresting: // Values stored here: for (j=1; j<=AvExp.ShockS(); j++) trials_V[j][i] = AvExp.V[j] ; // policy parametres for 'go' here: for (j=1; j<=AvExp.ShockS(); j++) trials_pol_m[j][i] = AvExp.mVal(j,2); // policy parametres for 'stay' here - further down same array !: for (j=1; j<=AvExp.ShockS(); j++) trials_pol_m[AvExp.ShockS()+j][i]= AvExp.mVal(j,1); // policy probabilities for 'shuttle' here (for j>ShockS set to 0 anyway): for (j=1; j<=AvExp.ShockS(); j++) trials_Py_go[j][i]= AvExp.pGibbsDgn(j,AvExp.Shuttle()); } // end store detailed data for trials of first Run } // end of for loop over trials of trials of a single run (i) } // end of for loop over runs in each protocol } // end of loop over protocols (k) // - - - - - - - - - - - - Store Results on disk - - - - - - - - - - - - - datPtr[1] = trials_n; // Indexes stored trials. for (i=1; i<=AvExp.TRunN(); i++) datPtr[i+1] = AvoidPr[i]; PAv.interpAr( datPtr, AvExp.RunTrialN(), 1, AvExp.TRunN()+1, 1.0 ); for (i=1; i<=AvExp.TRunN(); i++) datPtr[i+1] = Latency[i]; Lat.interpAr( datPtr, AvExp.RunTrialN(), 1, AvExp.TRunN()+1, 1.0 ); for (i=1; i<=AvExp.sSize(); i++) datPtr[i+1] = trials_s[i]; states.interpAr( datPtr, AvExp.RunTrialN(), 1, AvExp.sSize()+1, 1.0 ); for (i=1; i<=AvExp.ShockS(); i++) datPtr[i+1] = trials_V[i]; values.interpAr( datPtr, AvExp.RunTrialN(), 1, AvExp.ShockS()+1, 1.0 ); for (i=1; i<=2*AvExp.ShockS(); i++) datPtr[i+1] = trials_pol_m[i]; advals.interpAr( datPtr, AvExp.RunTrialN(), 1, 2*AvExp.ShockS()+1, 1.0 ); for (i=1; i<=AvExp.ShockS(); i++) datPtr[i+1] = trials_Py_go[i]; a2P.interpAr( datPtr, AvExp.RunTrialN(), 1, AvExp.ShockS()+1, 1.0 ); // rough files with the simulation parametres: AvExp.AuxOutput(exp_appetMod, exp_AvAtt, exp_ShockAmpl ) ; free_vector(trials_n, 1, AvExp.RunTrialN() ) ; free_matrix(trials_s, 1,AvExp.sSize(), 1,AvExp.RunTrialN() ) ; free_matrix(trials_V, 1,AvExp.VSize(), 1,AvExp.RunTrialN() ) ; free_matrix(trials_a, 1,AvExp.aSize(), 1,AvExp.RunTrialN() ) ; free_matrix(trials_pol_m, 1, 2*AvExp.ShockS(), 1,AvExp.RunTrialN() ) ; free_matrix(trials_Py_go , 1,AvExp.StateNum(),1,AvExp.RunTrialN()) ; free_matrix(AvoidPr, 1, AvExp.TRunN(), 1,AvExp.RunTrialN()) ; free_matrix(Latency, 1, AvExp.TRunN(), 1,AvExp.RunTrialN()) ; delete datPtr ; // - - - - - - - Run menu & adjust auxiliary params again - - - - - - - - menu.Refresh(); menuFlow = menu.GetChoices(menuNo, menuSt); // make sure all params updated ! AvExp.RefreshParams() ; // non-public params datPtrDim = AvExp_map.TotEqN() ; if (AvExp.TRunN() > datPtrDim) datPtrDim = AvExp.TRunN() ; datPtrDim +=2; datPtr = new float* [datPtrDim] ; } //- - - - - - - - End of menu dependent loop - - - - - - - - - - - // *************************** Tidy up ***************************** delete datPtr; free_vector(tSteps, 1, (n=roundfl(endpts[2]-endpts[1]+1)) ); free_matrix(tSeries, 1, AvExp_map.TotEqN(),1,n ) ; delete menuNo; delete menuSt ; } // end of main() //-------------------------- Functions ------------------------------------- // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - void MakeMenu() { const int Offset = 2 ; // Reminder: // menuNo[1] = &AvExp.appetMod ; menuNo[2] = &AvExp.averMod ; // menuNo[3] = &AvExp.lrnTrN ; menuNo[4] = &AvExp.D ; // menuNo[5] = &AvExp.lrnR ; menuNo[6] = &AvExp.lrnRPol ; // menuNo[7] = &AvExp.shuttleRet ; menuNo[8] = &AvExp.Temp ; // menuNo[9] = &exp_ShockAmpl ; menuNo[10] = &AvExp.vigourMod ; // menuNo[11] = &AvExp.runN ; menuNo[12] = &prot_to_sample ; // menuNo[13] = &AvExp.resPrevNo ; menu.SetOption(1 ,"\n Adv-CAR_4c: Policy learning only when better-than trials ?" ,'U' ,menuNo, menuSt, 0); menu.SetOption( Offset+1,"Reward error bias, appetMod : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+2,"Painful error bias, averMod : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+3,"Trial Number lrnTrN : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+4,"Discount factor D : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+5,"State V learning Rate, lrnR : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+6,"Policy par. learn Rate lrnRPol: " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+7,"Cost of shuttling, shuttleRet: " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+8,"'Temperature' Temp: " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+9,"Shock Amplitude exp_ShockAmpl: " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+10,"Vigour modul. coef., vigourMod: " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+11,"Runs/subjects per protocol : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+12,"Protocol to sample in detail : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+13,"No. of Resp. Prev. trials : " ,'N',menuNo, menuSt, 0); menu.SetOption( Offset+14,"No. of Extn. block trials : " ,'N',menuNo, menuSt, 0); menu.Refresh(); } // end of MakeMenu // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // READ THE NOTES ! void RetVMap( float *t, Vector **depVec, float *newVal ) { // This returns the new state of the system as newVal, given the current // state in depVec AND THE TIME AT newVal NOT AT depVec !!!! This is because // time is augmented by a method that calls this ... // newVal 1-6 are states, 7-13 are values, 14-19 actions, 20-33 action-values, // 34-47 are policy parametres const int Soffset = 1 ; // 1 as newVal starts from [1] const int Voffset = AvExp.sSize() ; // spaces taken by state record. // Action taken at state when t=0 goes to 14, so : const int Aoffset = Voffset+AvExp.VSize()+1 ; // Q(s=1,a=1) goes to 20. the -3 is bec. states and actions both beg. at 1 each. const int Qoffset= Aoffset+AvExp.aSize()-3; // ... but remember // that Q values are 'interleaved': s1stay,s1go,s2stay,..., and so are p values. const int Poffset= Qoffset+2*AvExp.StateNum(); // no '-3' as in the Qs bec. already // done for Q. int t_next = roundfl(*t); int t_this = t_next-1 ; int S_next; float V_next; int A_this ; // Corrected but untested : int S_this= roundfl( depVec[1]->GetValue(t_this+1) ) ; float V_this= depVec[2]->GetValue(S_this) ; // Seems to have mostly got away w. following, conceptually wrong : // int S_this=roundfl(*(newVal+Soffset+t_this )); // float V_this=*(newVal+Voffset+S_this ); // actor selects action acc. to current policy and Response Prevenetion : A_this = roundfl( *(newVal+Aoffset+t_this) = AvExp.selGActNoRetDgnResPrev( S_this )) ; // and environment lands us in next state ... S_next = roundfl( *(newVal+Soffset+t_next) = AvExp.nextSNoRet(t_this, A_this)) ; // Using next state (& assoc. return), corrects Advantage params (which determine policy) ... // Note delat_p4 is the one that only involves a nonzero delta for better-than // outcomes *(newVal+Poffset+2*S_this+A_this ) += AvExp.LrnRPol()* AvExp.delta_p4(S_this, A_this, S_next); // Using next state (& assoc. return), critic corrects Value ...: *(newVal+Voffset+S_this ) += AvExp.LRate()*AvExp.delta3( S_this, A_this, S_next ); } // end of Difference Equations. // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // ------------------------- end of file -----------------------------------