MyCaffe  1.12.2.41
Deep learning software for Windows C# programmers.
CartPoleGym.cs
1using MyCaffe.basecode;
3using System;
4using System.Collections.Generic;
5using System.Diagnostics;
6using System.Drawing;
7using System.Drawing.Drawing2D;
8using System.Linq;
9using System.Text;
10using System.Threading;
11using System.Threading.Tasks;
12
13namespace MyCaffe.gym
14{
27 {
28 string m_strName = "Cart-Pole";
29 double m_dfGravity = 9.8;
30 double m_dfMassCart = 1.0;
31 double m_dfMassPole = 0.1;
32 double m_dfTotalMass;
33 double m_dfLength = 0.5; // actually half the pole's length
34 double m_dfPoleMassLength;
35 double m_dfForce = 10;
36 bool m_bAdditive = false;
37 double m_dfTau = 0.02; // seconds between state updates.
38 Dictionary<string, int> m_rgActionSpace;
39 Bitmap m_bmp = null;
40 int m_nSteps = 0;
41 int m_nMaxSteps = 0;
42 ColorMapper m_clrMap = null;
43 DATA_TYPE m_dt = DATA_TYPE.VALUES;
44
45 // Angle at which to fail the episode
46 double m_dfThetaThreshold = CartPoleState.MAX_THETA;
47 double m_dfXThreshold = CartPoleState.MAX_X;
48
49 Random m_random = new Random();
50 CartPoleState m_state = new CartPoleState();
51 int? m_nStepsBeyondDone = null;
52 Log m_log;
53
57 public enum ACTION
58 {
62 MOVELEFT,
66 MOVERIGHT
67 }
68
72 public CartPoleGym()
73 {
74 m_dfTotalMass = m_dfMassPole + m_dfMassCart;
75 m_dfPoleMassLength = m_dfMassPole * m_dfLength;
76
77 m_rgActionSpace = new Dictionary<string, int>();
78 m_rgActionSpace.Add("MoveLeft", 0);
79 m_rgActionSpace.Add("MoveRight", 1);
80 }
81
92 public void Initialize(Log log, PropertySet properties)
93 {
94 m_dfForce = 10;
95 m_bAdditive = false;
96
97 if (properties != null)
98 {
99 m_dfForce = properties.GetPropertyAsDouble("Init1", 10);
100 m_bAdditive = (properties.GetPropertyAsDouble("Init2", 0) == 0) ? false : true;
101 }
102
103 m_log = log;
104 m_nMaxSteps = 0;
105 Reset(false);
106 }
107
108
114 public IXMyCaffeGym Clone(PropertySet properties = null)
115 {
116 CartPoleGym gym = new CartPoleGym();
117
118 if (properties != null)
119 gym.Initialize(m_log, properties);
120
121 return gym;
122 }
123
128 {
129 get { return false; }
130 }
131
136 {
137 get { return m_dt; }
138 }
139
144 {
145 get { return new DATA_TYPE[] { DATA_TYPE.VALUES, DATA_TYPE.BLOB }; }
146 }
147
151 public string Name
152 {
153 get { return m_strName; }
154 }
155
159 public int UiDelay
160 {
161 get { return 20; }
162 }
163
167 public double TestingPercent
168 {
169 get { return -1; }
170 }
171
176 public Dictionary<string, int> GetActionSpace()
177 {
178 return m_rgActionSpace;
179 }
180
181 private void processAction(ACTION? a)
182 {
183 if (a.HasValue)
184 {
185 switch (a)
186 {
187 case ACTION.MOVELEFT:
188 m_state.ForceMag = (m_state.ForceMag * ((m_bAdditive) ? 1 : 0)) + m_dfForce * -1;
189 break;
190
191 case ACTION.MOVERIGHT:
192 m_state.ForceMag = (m_state.ForceMag * ((m_bAdditive) ? 1 : 0)) + m_dfForce * 1;
193 break;
194 }
195 }
196 }
197
201 public void Close()
202 {
203 }
204
213 public Tuple<Bitmap, SimpleDatum> Render(bool bShowUi, int nWidth, int nHeight, bool bGetAction)
214 {
215 List<double> rgData = new List<double>();
216
217 rgData.Add(m_state.X);
218 rgData.Add(m_state.XDot);
219 rgData.Add(m_state.Theta);
220 rgData.Add(m_state.ThetaDot);
221 rgData.Add(m_state.ForceMag);
222 rgData.Add(m_nSteps);
223
224 return Render(bShowUi, nWidth, nHeight, rgData.ToArray(), bGetAction);
225 }
226
236 public Tuple<Bitmap, SimpleDatum> Render(bool bShowUi, int nWidth, int nHeight, double[] rgData, bool bGetAction)
237 {
238 Bitmap bmp = new Bitmap(nWidth, nHeight);
239
240 double dfX = rgData[0];
241 double dfTheta = rgData[2];
242 double dfThetaInDegrees = dfTheta * (180.0 / Math.PI);
243 double dfForceMag = rgData[4];
244 int nSteps = (int)rgData[5];
245
246 m_nSteps = nSteps;
247 m_nMaxSteps = Math.Max(nSteps, m_nMaxSteps);
248
249 using (Graphics g = Graphics.FromImage(bmp))
250 {
251 Rectangle rc = new Rectangle(0, 0, bmp.Width, bmp.Height);
252 g.FillRectangle(Brushes.White, rc);
253
254 float fScreenWidth = g.VisibleClipBounds.Width;
255 float fScreenHeight = g.VisibleClipBounds.Height;
256 float fWorldWidth = (float)(m_dfXThreshold * 2);
257 float fScale = fScreenWidth / fWorldWidth;
258 float fCartY = 100; // Top of Cart;
259 float fPoleWidth = 10;
260 float fPoleLen = fScale * 1.0f;
261 float fCartWidth = 50;
262 float fCartHeight = 30;
263
264 float fL = -fCartWidth / 2;
265 float fR = fCartWidth / 2;
266 float fT = fCartHeight / 2;
267 float fB = -fCartHeight / 2;
268 float fAxleOffset = 0;
269 GeomCart cart = new GeomCart(fL, fR, fT, fB, Color.SkyBlue, Color.Black);
270
271 fL = -fPoleWidth / 2;
272 fR = fPoleWidth / 2;
273 fT = fPoleLen - fPoleWidth / 2;
274 fB = --fPoleWidth / 2;
275 GeomPole pole = new GeomPole(fL, fR, fT, fB, Color.Tan, Color.Black);
276
277 fL = 0;
278 fR = fScreenWidth;
279 fT = fCartY;
280 fB = fT;
281 GeomLine track = new GeomLine(fL, fR, fT, fB, Color.Black, Color.Black);
282
283 fL = 0;
284 fR = fScreenWidth;
285 fT = fCartY - 40;
286 fB = fT + 10;
287
288 if (m_clrMap == null)
289 m_clrMap = new ColorMapper(fL, fR, Color.Fuchsia, Color.Red);
290
291 GeomRectangle posbar = new GeomRectangle(fL, fR, fT, fB, Color.Black, Color.Transparent, m_clrMap);
292
293 float fCartX = (float)dfX * fScale + fScreenWidth / 2; // middle of the cart.
294 cart.SetLocation(fCartX, fCartY);
295 pole.SetRotation((float)-dfThetaInDegrees);
296 cart.Attach(pole, fAxleOffset);
297
298 GeomView view = new GeomView();
299
300 view.RenderText(g, "Current Force = " + dfForceMag.ToString(), 10, 10);
301 view.RenderText(g, "X = " + dfX.ToString("N02"), 10, 24);
302 view.RenderText(g, "Theta = " + dfTheta.ToString("N02") + " radians", 10, 36);
303 view.RenderText(g, "Theta = " + dfThetaInDegrees.ToString("N02") + " degrees", 10, 48);
304 view.RenderSteps(g, m_nSteps, m_nMaxSteps);
305
306 // Render the objects.
307 view.AddObject(posbar);
308 view.AddObject(track);
309 view.AddObject(cart);
310 view.Render(g);
311
312 SimpleDatum sdAction = null;
313
314 if (bGetAction)
315 sdAction = getActionData(fCartX, fCartY, bmp);
316
317 m_bmp = bmp;
318
319 return new Tuple<Bitmap, SimpleDatum>(bmp, sdAction);
320 }
321 }
322
323 private SimpleDatum getActionData(float fX, float fY, Bitmap bmpSrc)
324 {
325 double dfWid = 156;
326 double dfHt = 156;
327 double dfX = fX - (dfWid * 0.5);
328 double dfY = (bmpSrc.Height - fY) - (dfHt * 0.75);
329
330 RectangleF rc = new RectangleF((float)dfX, (float)dfY, (float)dfWid, (float)dfHt);
331 Bitmap bmp = new Bitmap((int)dfWid, (int)dfHt);
332
333 using (Graphics g = Graphics.FromImage(bmp))
334 {
335 RectangleF rc1 = new RectangleF(0, 0, (float)dfWid, (float)dfHt);
336 g.FillRectangle(Brushes.Black, rc1);
337 g.DrawImage(bmpSrc, rc1, rc, GraphicsUnit.Pixel);
338 }
339
340 return ImageData.GetImageDataD(bmp, 3, false, -1);
341 }
342
349 public Tuple<State, double, bool> Reset(bool bGetLabel, PropertySet props = null)
350 {
351 double dfX = randomUniform(-0.05, 0.05);
352 double dfXDot = randomUniform(-0.05, 0.05);
353 double dfTheta = randomUniform(-0.05, 0.05);
354 double dfThetaDot = randomUniform(-0.05, 0.05);
355 m_nStepsBeyondDone = null;
356 m_nSteps = 0;
357
358 m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);
359 return new Tuple<State, double, bool>(m_state.Clone(), 1, false);
360 }
361
362 private double randomUniform(double dfMin, double dfMax)
363 {
364 double dfRange = dfMax - dfMin;
365 return dfMin + (m_random.NextDouble() * dfRange);
366 }
367
375 public Tuple<State, double, bool> Step(int nAction, bool bGetLabel, PropertySet propExtra = null)
376 {
377 CartPoleState state = new CartPoleState(m_state);
378 double dfReward = 0;
379
380 processAction((ACTION)nAction);
381
382 double dfX = state.X;
383 double dfXDot = state.XDot;
384 double dfTheta = state.Theta;
385 double dfThetaDot = state.ThetaDot;
386 double dfForce = m_state.ForceMag;
387 double dfCosTheta = Math.Cos(dfTheta);
388 double dfSinTheta = Math.Sin(dfTheta);
389 double dfTemp = (dfForce + m_dfPoleMassLength * dfThetaDot * dfThetaDot * dfSinTheta) / m_dfTotalMass;
390 double dfThetaAcc = (m_dfGravity * dfSinTheta - dfCosTheta * dfTemp) / (m_dfLength * ((4.0 / 3.0) - m_dfMassPole * dfCosTheta * dfCosTheta / m_dfTotalMass));
391 double dfXAcc = dfTemp - m_dfPoleMassLength * dfThetaAcc * dfCosTheta / m_dfTotalMass;
392
393 dfX += m_dfTau * dfXDot;
394 dfXDot += m_dfTau * dfXAcc;
395 dfTheta += m_dfTau * dfThetaDot;
396 dfThetaDot += m_dfTau * dfThetaAcc;
397
398 CartPoleState stateOut = m_state;
399 m_state = new CartPoleState(dfX, dfXDot, dfTheta, dfThetaDot);
400
401 bool bDone = false;
402
403 if (dfX < -m_dfXThreshold || dfX > m_dfXThreshold ||
404 dfTheta < -m_dfThetaThreshold || dfTheta > m_dfThetaThreshold)
405 bDone = true;
406
407 if (!bDone)
408 {
409 dfReward = 1.0;
410 }
411 else if (!m_nStepsBeyondDone.HasValue)
412 {
413 // Pole just fell!
414 m_nStepsBeyondDone = 0;
415 dfReward = 1.0;
416 }
417 else
418 {
419 if (m_nStepsBeyondDone.Value == 0)
420 m_log.WriteLine("WARNING: You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()'");
421
422 m_nStepsBeyondDone++;
423 dfReward = 0.0;
424 }
425
426 m_nSteps++;
427 m_nMaxSteps = Math.Max(m_nMaxSteps, m_nSteps);
428
429 stateOut.Steps = m_nSteps;
430 return new Tuple<State, double, bool>(stateOut.Clone(), dfReward, bDone);
431 }
432
440 {
441 int nH = 1;
442 int nW = 1;
443 int nC = 4;
444
445 if (dt == DATA_TYPE.DEFAULT)
446 dt = DATA_TYPE.VALUES;
447
448 if (dt == DATA_TYPE.BLOB)
449 {
450 nH = 156;
451 nW = 156;
452 nC = 3;
453 }
454
455 SourceDescriptor srcTrain = new SourceDescriptor((int)GYM_DS_ID.CARTPOLE, Name + ".training", nW, nH, nC, false, false);
456 SourceDescriptor srcTest = new SourceDescriptor((int)GYM_SRC_TEST_ID.CARTPOLE, Name + ".testing", nW, nH, nC, false, false);
457 DatasetDescriptor ds = new DatasetDescriptor((int)GYM_SRC_TRAIN_ID.CARTPOLE, Name, null, null, srcTrain, srcTest, "CartPoleGym", "CartPole Gym", null, GYM_TYPE.DYNAMIC);
458
459 m_dt = dt;
460
461 return ds;
462 }
463 }
464
465 class GeomCart : GeomPolygon
466 {
467 GeomPole m_pole;
468
469 public GeomCart(float fL, float fR, float fT, float fB, Color clrFill, Color clrBorder)
470 : base(fL, fR, fT, fB, clrFill, clrBorder)
471 {
472 }
473
474 public void Attach(GeomPole pole, float fXOffset)
475 {
476 m_pole = pole;
477 m_pole.SetLocation(Location.X + fXOffset, Location.Y);
478 }
479
480 public override void Render(Graphics g)
481 {
482 base.Render(g);
483 m_pole.Render(g);
484 }
485 }
486
487 class GeomPole : GeomPolygon
488 {
489 GeomEllipse m_axis;
490
491 public GeomPole(float fL, float fR, float fT, float fB, Color clrFill, Color clrBorder)
492 : base(fL, fR, fT, fB, clrFill, clrBorder)
493 {
494 float fWid = fR - fL;
495 m_axis = new GeomEllipse(fL, fR, fB - fWid, fB, Color.Brown, Color.Black);
496 }
497
498 public override void SetLocation(float fX, float fY)
499 {
500 m_axis.SetLocation(fX, fY);
501 base.SetLocation(fX, fY);
502 }
503
504 public override void Render(Graphics g)
505 {
506 base.Render(g);
507 m_axis.Render(g);
508 }
509 }
510
511 class CartPoleState : State
512 {
513 double m_dfX = 0;
514 double m_dfXDot = 0;
515 double m_dfTheta = 0;
516 double m_dfThetaDot = 0;
517 double m_dfForceMag = 0;
518 int m_nSteps = 0;
519
520 public const double MAX_X = 2.4;
521 public const double MAX_THETA = 20 * (Math.PI/180);
522
523 public CartPoleState(double dfX = 0, double dfXDot = 0, double dfTheta = 0, double dfThetaDot = 0)
524 {
525 m_dfX = dfX;
526 m_dfXDot = dfXDot;
527 m_dfTheta = dfTheta;
528 m_dfThetaDot = dfThetaDot;
529 m_dfForceMag = 0;
530 }
531
532 public CartPoleState(CartPoleState s)
533 {
534 m_dfX = s.m_dfX;
535 m_dfXDot = s.m_dfXDot;
536 m_dfTheta = s.m_dfTheta;
537 m_dfThetaDot = s.m_dfThetaDot;
538 m_dfForceMag = s.m_dfForceMag;
539 m_nSteps = s.m_nSteps;
540 }
541
542 public int Steps
543 {
544 get { return m_nSteps; }
545 set { m_nSteps = value; }
546 }
547
548 public double ForceMag
549 {
550 get { return m_dfForceMag; }
551 set { m_dfForceMag = value; }
552 }
553
554 public double X
555 {
556 get { return m_dfX; }
557 set { m_dfX = value; }
558 }
559
560 public double XDot
561 {
562 get { return m_dfXDot; }
563 set { m_dfXDot = value; }
564 }
565
566 public double Theta
567 {
568 get { return m_dfTheta; }
569 set { m_dfTheta = value; }
570 }
571
572 public double ThetaDot
573 {
574 get { return m_dfThetaDot; }
575 set { m_dfThetaDot = value; }
576 }
577
578 public double ThetaInDegrees
579 {
580 get
581 {
582 return m_dfTheta * (180.0/Math.PI);
583 }
584 }
585
586 public override State Clone()
587 {
588 return new CartPoleState(this);
589 }
590
591 public override SimpleDatum GetData(bool bNormalize, out int nDataLen)
592 {
593 int nScale = 4;
594 nDataLen = 4;
595 Valuemap data = new Valuemap(1, 6, 1);
596
597 data.SetPixel(0, 0, getValue(m_dfX, -MAX_X, MAX_X, bNormalize));
598 data.SetPixel(0, 1, getValue(m_dfXDot, -MAX_X * nScale, MAX_X * nScale, bNormalize));
599 data.SetPixel(0, 2, getValue(m_dfTheta, -MAX_THETA, MAX_THETA, bNormalize));
600 data.SetPixel(0, 3, getValue(m_dfThetaDot, -MAX_THETA * nScale * 2, MAX_THETA * nScale * 2, bNormalize));
601 data.SetPixel(0, 4, getValue(m_dfForceMag, -100, 100, bNormalize));
602 data.SetPixel(0, 5, m_nSteps);
603
604 return new SimpleDatum(data);
605 }
606
607 private double getValue(double dfVal, double dfMin, double dfMax, bool bNormalize)
608 {
609 if (!bNormalize)
610 return dfVal;
611
612 return (dfVal - dfMin) / (dfMax - dfMin);
613 }
614 }
615}
The ColorMapper maps a value within a number range, to a Color within a color scheme.
Definition: ColorMapper.cs:14
The ImageData class is a helper class used to convert between Datum, other raw data,...
Definition: ImageData.cs:14
static Datum GetImageDataD(Bitmap bmp, int nChannels, bool bDataIsReal, int nLabel, bool bUseLockBitmap=true, int[] rgFocusMap=null)
The GetImageDataD function converts a Bitmap into a Datum using the double type for real data.
Definition: ImageData.cs:44
The Log class provides general output in text form.
Definition: Log.cs:13
void WriteLine(string str, bool bOverrideEnabled=false, bool bHeader=false, bool bError=false, bool bDisable=false)
Write a line of output.
Definition: Log.cs:80
Specifies a key-value pair of properties.
Definition: PropertySet.cs:16
double GetPropertyAsDouble(string strName, double dfDefault=0)
Returns a property as an double value.
Definition: PropertySet.cs:307
The SimpleDatum class holds a data input within host memory.
Definition: SimpleDatum.cs:161
The Realmap operates similar to a bitmap but is actually just an array of doubles.
Definition: Valuemap.cs:15
void SetPixel(int nX, int nY, double clr)
Set a given pixel to a given color.
Definition: Valuemap.cs:65
The DatasetDescriptor class describes a dataset which contains both a training data source and testin...
The SourceDescriptor class contains all information describing a data source.
The CartPole Gym provides a simulation of a cart with a balancing pole standing on top of it.
Definition: CartPoleGym.cs:27
DATA_TYPE SelectedDataType
Returns the selected data type.
Definition: CartPoleGym.cs:136
CartPoleGym()
The constructor.
Definition: CartPoleGym.cs:72
double TestingPercent
Returns the testinng percent of -1, which then uses the default of 0.2.
Definition: CartPoleGym.cs:168
bool RequiresDisplayImage
Returns false indicating that this Gym does not require a display image.
Definition: CartPoleGym.cs:128
void Initialize(Log log, PropertySet properties)
Initialize the gym with the specified properties.
Definition: CartPoleGym.cs:92
DatasetDescriptor GetDataset(DATA_TYPE dt, Log log=null)
Returns the dataset descriptor of the dynamic dataset produced by the Gym.
Definition: CartPoleGym.cs:439
Dictionary< string, int > GetActionSpace()
Returns the action space as a dictionary of name,actionid pairs.
Definition: CartPoleGym.cs:176
void Close()
Shutdown and close the gym.
Definition: CartPoleGym.cs:201
DATA_TYPE[] SupportedDataType
Returns the data types supported by this gym.
Definition: CartPoleGym.cs:144
Tuple< State, double, bool > Step(int nAction, bool bGetLabel, PropertySet propExtra=null)
Step the gym one step in its simulation.
Definition: CartPoleGym.cs:375
string Name
Returns the gym's name.
Definition: CartPoleGym.cs:152
IXMyCaffeGym Clone(PropertySet properties=null)
Create a new copy of the gym.
Definition: CartPoleGym.cs:114
Tuple< Bitmap, SimpleDatum > Render(bool bShowUi, int nWidth, int nHeight, bool bGetAction)
Render the gym's current state on a bitmap and SimpleDatum.
Definition: CartPoleGym.cs:213
int UiDelay
Returns the delay to use (if any) when the user-display is visible.
Definition: CartPoleGym.cs:160
Tuple< State, double, bool > Reset(bool bGetLabel, PropertySet props=null)
Reset the state of the gym.
Definition: CartPoleGym.cs:349
Tuple< Bitmap, SimpleDatum > Render(bool bShowUi, int nWidth, int nHeight, double[] rgData, bool bGetAction)
Render the gyms specified data.
Definition: CartPoleGym.cs:236
ACTION
Defines the actions to perform.
Definition: CartPoleGym.cs:58
The GeomLine object is used to render a line.
Definition: Geometry.cs:247
The GeomEllipse object is used to render an rectangle.
Definition: Geometry.cs:319
The GeomView manages and renders a collection of Geometric objects.
Definition: Geometry.cs:476
void Render(Graphics g)
Renders the view.
Definition: Geometry.cs:570
void AddObject(GeomObj obj)
Add a new geometric object to the view.
Definition: Geometry.cs:490
void RenderText(Graphics g, string str, float fX, float fY, Brush br=null)
Render text at a location.
Definition: Geometry.cs:503
void RenderSteps(Graphics g, int nSteps, int nMax)
Renders the Gym step information.
Definition: Geometry.cs:519
State()
The constructor.
Definition: Interfaces.cs:346
The IXMyCaffeGym interface is used to interact with each Gym.
Definition: Interfaces.cs:99
The descriptors namespace contains all descriptor used to describe various items stored within the da...
The MyCaffe.basecode contains all generic types used throughout MyCaffe.
Definition: Annotation.cs:12
GYM_TYPE
Defines the gym type (if any).
Definition: Interfaces.cs:116
DATA_TYPE
Defines the gym data type.
Definition: Interfaces.cs:135
The MyCaffe.gym namespace contains all classes related to the Gym's supported by MyCaffe.
GYM_SRC_TRAIN_ID
Defines the Standard GYM Training Data Source ID's.
Definition: Interfaces.cs:45
GYM_DS_ID
Defines the Standard GYM Dataset ID's.
Definition: Interfaces.cs:18
GYM_SRC_TEST_ID
Defines the Standard GYM Testing Data Source ID's.
Definition: Interfaces.cs:72
The MyCaffe namespace contains the main body of MyCaffe code that closesly tracks the C++ Caffe open-...
Definition: Annotation.cs:12