diff --git a/openAI_RRM/LICENSE b/openAI_RRM/LICENSE
new file mode 100644
index 0000000..8ac59fd
--- /dev/null
+++ b/openAI_RRM/LICENSE
@@ -0,0 +1,9 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 Technische Universität Berlin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2/common.py b/openAI_RRM/SimulationSlavesConfig/Setting2_2/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave.yaml
new file mode 100644
index 0000000..230e9bd
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave.yaml
@@ -0,0 +1,26 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm control simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:01", "cc:cc:cc:cc:cc:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave2.yaml
new file mode 100644
index 0000000..79d760b
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2/config_slave2.yaml
@@ -0,0 +1,26 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm control simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:11", "cc:cc:cc:cc:cc:12"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting2_2/readme.txt
new file mode 100644
index 0000000..6522f4e
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2/readme.txt
@@ -0,0 +1,15 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/common.py b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave.yaml
new file mode 100644
index 0000000..a196962
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave.yaml
@@ -0,0 +1,32 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm control simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:01", "cc:cc:cc:cc:cc:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault': 54000000, 
+        'channelThroughput':[54000000, 54000000, 54000000, 54000000, 26000000],
+        'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave2.yaml
new file mode 100644
index 0000000..89d5374
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/config_slave2.yaml
@@ -0,0 +1,32 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm control simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:11"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault': 54000000, 
+        'channelThroughput':[54000000, 54000000, 54000000, 54000000, 26000000],
+        'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/readme.txt
new file mode 100644
index 0000000..6522f4e
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting2_2unsym/readme.txt
@@ -0,0 +1,15 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_112/common.py b/openAI_RRM/SimulationSlavesConfig/Setting3_112/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_112/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave.yaml
new file mode 100644
index 0000000..613455d
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave.yaml
@@ -0,0 +1,28 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:01", "cc:cc:cc:cc:cc:02", "cc:cc:cc:cc:cc:03", "cc:cc:cc:cc:cc:04", "cc:cc:cc:cc:cc:05", "cc:cc:cc:cc:cc:06"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1, 
+            'mode': 'single'
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave2.yaml
new file mode 100644
index 0000000..1631d92
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave2.yaml
@@ -0,0 +1,28 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:11", "cc:cc:cc:cc:cc:12", "cc:cc:cc:cc:cc:13", "cc:cc:cc:cc:cc:14", "cc:cc:cc:cc:cc:15", "cc:cc:cc:cc:cc:16"],
+        'neighbors' : [['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1,
+        'mode': 'single'
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave3.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave3.yaml
new file mode 100644
index 0000000..0f3ecce
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_112/config_slave3.yaml
@@ -0,0 +1,28 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:21", "cc:cc:cc:cc:cc:22", "cc:cc:cc:cc:cc:23", "cc:cc:cc:cc:cc:24", "cc:cc:cc:cc:cc:25", "cc:cc:cc:cc:cc:26"],
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:01']],
+        'myMAC' : 'aa:aa:aa:aa:aa:03',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 
+        'mode': 'single' # training or working
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_112/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting3_112/readme.txt
new file mode 100644
index 0000000..4cb054e
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_112/readme.txt
@@ -0,0 +1,16 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+uniflex-agent --config ./config_slave3.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_222/common.py b/openAI_RRM/SimulationSlavesConfig/Setting3_222/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_222/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave.yaml
new file mode 100644
index 0000000..d4107ed
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave.yaml
@@ -0,0 +1,28 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:01", "cc:cc:cc:cc:cc:02", "cc:cc:cc:cc:cc:03", "cc:cc:cc:cc:cc:04", "cc:cc:cc:cc:cc:05", "cc:cc:cc:cc:cc:06"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0,
+        'mode': 'singel', # training or working
+        'clientnum': 2}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave2.yaml
new file mode 100644
index 0000000..eba4afa
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave2.yaml
@@ -0,0 +1,29 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:11", "cc:cc:cc:cc:cc:12", "cc:cc:cc:cc:cc:13", "cc:cc:cc:cc:cc:14", "cc:cc:cc:cc:cc:15", "cc:cc:cc:cc:cc:16"],
+        'neighbors' : [['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 
+        'mode': 'singel', # training or working
+        'clientnum': 2
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave3.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave3.yaml
new file mode 100644
index 0000000..39afc9f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_222/config_slave3.yaml
@@ -0,0 +1,29 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:21", "cc:cc:cc:cc:cc:22", "cc:cc:cc:cc:cc:23", "cc:cc:cc:cc:cc:24", "cc:cc:cc:cc:cc:25", "cc:cc:cc:cc:cc:26"],
+        'neighbors' : [['aa:aa:aa:aa:aa:02']],
+        'myMAC' : 'aa:aa:aa:aa:aa:03',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0,
+        'mode': 'singel', # training or working
+        'clientnum': 2
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_222/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting3_222/readme.txt
new file mode 100644
index 0000000..4cb054e
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_222/readme.txt
@@ -0,0 +1,16 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+uniflex-agent --config ./config_slave3.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/common.py b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave.yaml
new file mode 100644
index 0000000..b375052
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave.yaml
@@ -0,0 +1,34 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm multi scenario simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:01", "cc:cc:cc:cc:cc:02", "cc:cc:cc:cc:cc:03", "cc:cc:cc:cc:cc:04", "cc:cc:cc:cc:cc:05", "cc:cc:cc:cc:cc:06"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:03'], ['aa:aa:aa:aa:aa:02'], ['aa:aa:aa:aa:aa:03']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1, 'clientconf': 'path-to-configfiles/clientconfig/3_112_AP1',
+            'mode': 'generator', # training or working or generator
+            'numsClients': [1,1,5,2,4],
+            #for generator
+            'maxNumClients' : 100,
+            'scenariosPerAPSetting': 60,
+            'clientPrefix' : "cc:cc:cc:cc:01:",
+            'scenarioBackup' : 'path-to-backup-files/AP1_scenarios.var'
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave2.yaml
new file mode 100644
index 0000000..b94b749
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave2.yaml
@@ -0,0 +1,34 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm multi scenario simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:11", "cc:cc:cc:cc:cc:12", "cc:cc:cc:cc:cc:13", "cc:cc:cc:cc:cc:14", "cc:cc:cc:cc:cc:15", "cc:cc:cc:cc:cc:16"],
+        'neighbors' : [['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03'], ['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:03'], []],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1, 'clientconf': 'path-to-configfiles/3_112_AP2', 
+        'mode': 'generator', # training or working or generator
+        'numsClients': [1,3,1,4,3],
+        #for generator
+        'maxNumClients' : 100,
+        'clientPrefix' : "cc:cc:cc:cc:02:",
+        'scenariosPerAPSetting': 60,
+            'scenarioBackup' : 'path-to-backup-files/AP2_scenarios.var'
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave3.yaml b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave3.yaml
new file mode 100644
index 0000000..71c5647
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/config_slave3.yaml
@@ -0,0 +1,34 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'rrm multi scenario simulation'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:cc:21", "cc:cc:cc:cc:cc:22", "cc:cc:cc:cc:cc:23", "cc:cc:cc:cc:cc:24", "cc:cc:cc:cc:cc:25", "cc:cc:cc:cc:cc:26"],
+        'neighbors' : [['aa:aa:aa:aa:aa:02', 'aa:aa:aa:aa:aa:01'], ['aa:aa:aa:aa:aa:02'], ['aa:aa:aa:aa:aa:01']],
+        'myMAC' : 'aa:aa:aa:aa:aa:03',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'clientconf': 'path-to-configfiles/3_112_AP3', 
+        'mode': 'generator', # training or working or generator
+        'numsClients': [5,2,3,2,2],
+            #for generator
+            'maxNumClients' : 100,
+            'scenariosPerAPSetting': 60,
+            'clientPrefix' : "cc:cc:cc:cc:03:",
+            'scenarioBackup' : 'path-to-backup-files/AP3_scenarios.var'
+        }}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/readme.txt
new file mode 100644
index 0000000..5b6ee7b
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting3_varSet/readme.txt
@@ -0,0 +1,15 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent_multi.py --config ./config_master_simulation.yaml
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+uniflex-agent --config ./config_slave3.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/common.py b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave.yaml
new file mode 100644
index 0000000..83a70b7
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:01:01", "cc:cc:cc:cc:01:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02','aa:aa:aa:aa:aa:04']],
+        'myMAC' : 'aa:aa:aa:aa:aa:01',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave2.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave2.yaml
new file mode 100644
index 0000000..2f16895
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave2.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:02:01", "cc:cc:cc:cc:02:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:01','aa:aa:aa:aa:aa:03','aa:aa:aa:aa:aa:05']],
+        'myMAC' : 'aa:aa:aa:aa:aa:02',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave3.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave3.yaml
new file mode 100644
index 0000000..3993fde
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave3.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:03:01", "cc:cc:cc:cc:03:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02','aa:aa:aa:aa:aa:06']],
+        'myMAC' : 'aa:aa:aa:aa:aa:03',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave4.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave4.yaml
new file mode 100644
index 0000000..6b13e34
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave4.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:04:01"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:01','aa:aa:aa:aa:aa:05','aa:aa:aa:aa:aa:07']],
+        'myMAC' : 'aa:aa:aa:aa:aa:04',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave5.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave5.yaml
new file mode 100644
index 0000000..4670e55
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave5.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:05:01", "cc:cc:cc:cc:05:02", "cc:cc:cc:cc:05:03"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:02','aa:aa:aa:aa:aa:04','aa:aa:aa:aa:aa:06','aa:aa:aa:aa:aa:08']],
+        'myMAC' : 'aa:aa:aa:aa:aa:05',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':3, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave6.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave6.yaml
new file mode 100644
index 0000000..47127a6
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave6.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:06:01"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:03','aa:aa:aa:aa:aa:05','aa:aa:aa:aa:aa:08']],
+        'myMAC' : 'aa:aa:aa:aa:aa:06',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':1, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave7.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave7.yaml
new file mode 100644
index 0000000..b2d9670
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave7.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:07:01", "cc:cc:cc:cc:07:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:04', 'aa:aa:aa:aa:aa:08']],
+        'myMAC' : 'aa:aa:aa:aa:aa:07',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'channelThroughput':[54000000, 54000000, 54000000, 54000000, 26000000], 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave8.yaml b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave8.yaml
new file mode 100644
index 0000000..2aa4366
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/config_slave8.yaml
@@ -0,0 +1,35 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xsub: "tcp://127.0.0.1:8990"
+#  xpub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_simple
+      class_name : SimpleModule4
+      devices : ['phy0']
+      kwargs : { "clients" : ["cc:cc:cc:cc:08:01", "cc:cc:cc:cc:08:02"], 
+        'neighbors' : [['aa:aa:aa:aa:aa:05','aa:aa:aa:aa:aa:06','aa:aa:aa:aa:aa:07']],
+        'myMAC' : 'aa:aa:aa:aa:aa:08',
+        'simulation': {'channelSwitchingTime': 0, 'channelThroughputDefault':54000000, 'txBytesRandom':0, 'clientnum':2, 'mode': 'single'}}
+
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/my_filter.py b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/my_filter.py
new file mode 100755
index 0000000..59d59a2
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/my_filter.py
@@ -0,0 +1,53 @@
+import logging
+from uniflex.core import modules
+from sbi.radio_device.events import SpectralScanSampleEvent
+from common import AveragedSpectrumScanSampleEvent
+from common import ChangeWindowSizeEvent
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class MyAvgFilter(modules.ControlApplication):
+    def __init__(self, window):
+        super(MyAvgFilter, self).__init__()
+        self.log = logging.getLogger('MyFilter')
+        self.window = window
+        self.samples = []
+
+    @modules.on_event(ChangeWindowSizeEvent)
+    def change_window_size(self, event):
+        self.log.info("New window size: {}".format(event.window))
+        self.window = event.window
+
+    def change_window_size_func(self, newWindow):
+        self.log.info("New window size: {}".format(newWindow))
+        self.window = newWindow
+
+    def get_window_size(self):
+        return self.window
+
+    @modules.on_event(SpectralScanSampleEvent)
+    def serve_spectral_scan_sample(self, event):
+        sample = event.sample
+        node = event.node
+        device = event.device
+        self.log.debug("New SpectralScan Sample:{} from node {}, device: {}"
+                       .format(sample, node, device))
+
+        self.samples.append(sample)
+
+        if len(self.samples) == self.window:
+            s = sum(self.samples)
+            self.samples.pop(0)
+            avg = s / self.window
+            self.log.debug("Calculated average: {}".format(avg))
+            event = AveragedSpectrumScanSampleEvent(avg)
+            self.send_event(event)
+
+    def add_two(self, value):
+        value1 = value + 2
+        value2 = value * 2
+        return [value1, value2]
diff --git a/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/readme.txt b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/readme.txt
new file mode 100644
index 0000000..1f1c50f
--- /dev/null
+++ b/openAI_RRM/SimulationSlavesConfig/Setting8_22213122/readme.txt
@@ -0,0 +1,21 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Simulation
+uniflex-agent --config ./config_slave.yaml
+uniflex-agent --config ./config_slave2.yaml
+uniflex-agent --config ./config_slave3.yaml
+uniflex-agent --config ./config_slave4.yaml
+uniflex-agent --config ./config_slave5.yaml
+uniflex-agent --config ./config_slave6.yaml
+uniflex-agent --config ./config_slave7.yaml
+uniflex-agent --config ./config_slave8.yaml
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/channel_controller.py b/openAI_RRM/channel_controller.py
new file mode 100755
index 0000000..f56407b
--- /dev/null
+++ b/openAI_RRM/channel_controller.py
@@ -0,0 +1,680 @@
+import logging
+import datetime
+import random
+import numpy
+import sys
+from math import *
+
+from functools import reduce
+
+from sbi.radio_device.events import PacketLossEvent
+from uniflex.core import modules
+from uniflex.core import events
+from uniflex.core.timer import TimerEventSender
+from common import AveragedSpectrumScanSampleEvent
+from common import ChangeWindowSizeEvent
+
+from gym import spaces
+
+from UniFlexGym.interfaces.uniflex_controller import UniFlexController
+
+__author__ = "Piotr Gawlowicz, Sascha Rösler"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de, s.resler@campus.tu-berlin.de"
+
+class PeriodicEvaluationTimeEvent(events.TimeEvent):
+    def __init__(self):
+        super().__init__()
+
+
+class UniflexChannelController(modules.ControlApplication, UniFlexController):
+    def __init__(self,**kwargs):
+        super(UniflexChannelController, self).__init__()
+        self.log = logging.getLogger('ChannelController')
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+        self.running = False
+
+        self.timeInterval = 10
+
+        self.packetLossEventsEnabled = False
+        self.channel = 1
+        self.availableChannels = []
+        self.observationSpace = []
+        self.lastObservation = []
+        self.actionSet = []
+        self.simulation = False
+        self.simulationsteps = None
+        self.aporder = None
+        self.mode = ""
+        self.scenarios = 1
+        self.currentScenario = 0
+        
+        
+        self.actionOrder = []
+        self.observationOrder = []
+        
+        self.registeredClients = self._create_interface_list()
+        
+        if 'availableChannels' in kwargs:
+            self.availableChannels = kwargs['availableChannels']
+        
+        if 'simulation' in kwargs:
+            self.simulation = kwargs['simulation']
+        
+        if 'steptime' in kwargs:
+            self.simulationsteptime = kwargs['steptime']
+        
+        if 'order' in kwargs:
+            self.aporder = kwargs['order']
+        
+        if 'mode' in kwargs:
+            self.mode = kwargs['mode']
+        
+        if 'scenarios' in kwargs:
+            self.scenarios = kwargs['scenarios']
+
+    @modules.on_start()
+    def my_start_function(self):
+        print("start control app")
+        self.running = True
+#        self.openAI_controller.run()
+
+    @modules.on_exit()
+    def my_stop_function(self):
+        print("stop control app")
+        self.running = False
+
+    @modules.on_event(events.NewNodeEvent)
+    def add_node(self, event):
+        node = event.node
+
+        self.log.info("Added new node: {}, Local: {}"
+                      .format(node.uuid, node.local))
+        self._add_node(node)
+
+        for dev in node.get_devices():
+            print("Dev: ", dev.name)
+            print(dev)
+
+        for m in node.get_modules():
+            print("Module: ", m.name)
+            print(m)
+
+        for app in node.get_control_applications():
+            print("App: ", app.name)
+            print(app)
+
+        #device = node.get_device(0)
+        #device.set_tx_power(15, "wlan0")
+        #device.set_channel(random.randint(1, 11), "wlan0")
+        #device.packet_loss_monitor_start()
+        #device.spectral_scan_start()
+        # device.play_waveform()
+        # TODO: is_implemented()
+
+    @modules.on_event(events.NodeExitEvent)
+    @modules.on_event(events.NodeLostEvent)
+    def remove_node(self, event):
+        self.log.info("Node lost".format())
+        node = event.node
+        reason = event.reason
+        if self._remove_node(node):
+            self.log.info("Node: {}, Local: {} removed reason: {}"
+                          .format(node.uuid, node.local, reason))
+
+    @modules.on_event(PacketLossEvent)
+    def serve_packet_loss_event(self, event):
+        node = event.node
+        device = event.device
+        self.log.info("Packet loss in node {}, dev: {}"
+                      .format(node.hostname, device.name))
+
+    @modules.on_event(AveragedSpectrumScanSampleEvent)
+    def serve_spectral_scan_sample(self, event):
+        avgSample = event.avg
+        self.log.info("Averaged Spectral Scan Sample: {}"
+                      .format(avgSample))
+
+    def default_cb(self, data):
+        node = data.node
+        devName = None
+        if data.device:
+            devName = data.device.name
+        msg = data.msg
+        print("Default Callback: "
+              "Node: {}, Dev: {}, Data: {}"
+              .format(node.hostname, devName, msg))
+
+    def get_power_cb(self, data):
+        node = data.node
+        msg = data.msg
+        dev = node.get_device(0)
+        print("Power in "
+              "Node: {}, Dev: {}, was set to: {}"
+              .format(node.hostname, dev.name, msg))
+
+        newPwr = random.randint(1, 20)
+        dev.blocking(False).set_tx_power(newPwr, "wlan0")
+        print("Power in "
+              "Node: {}, Dev: {}, was set to: {}"
+              .format(node.hostname, dev.name, newPwr))
+
+    def _get_device_by_uuids(self, node_uuid, dev_uuid):
+        nodes = self.get_nodes()
+        myNodes = [x for x in nodes if x.uuid == node_uuid]
+        if(len(myNodes) is not 1):
+            return None
+        node = myNodes[0]
+        devices = node.get_devices()
+        myDevices = [x for x in devices if x.uuid == dev_uuid]
+        if(len(myDevices) is not 1):
+            return None
+        return myDevices[0]
+
+    def scheduled_get_channel_cb(self, data):
+        node = data.node
+        msg = data.msg
+        dev = node.get_device(0)
+        print("Scheduled get_channel; Power in "
+              "Node: {}, Dev: {}, was set to: {}"
+              .format(node.hostname, dev.name, msg))
+    
+    '''
+        Channel mapping controller
+    '''
+    def set_channel(self, node_uuid, dev_uuid, ifaceName, channel_number, channel_width):
+        '''
+            Set one channel to one AP
+            :param node_uuid: UUID of AP node
+            :param dev_uuid: UUID of AP device
+            :param ifaceName: Name of AP interface
+            :param channel_number: Number of new channel
+            :param channel_width: Bandwidth of new channel
+        '''
+        device = self._get_device_by_uuids(node_uuid, dev_uuid)
+        if device is None:
+            return False
+        if channel_width is not None:
+            device.blocking(False).set_channel(channel_number, ifaceName, channel_width= channel_width, control_socket_path='/var/run/hostapd')
+        else:
+            device.blocking(False).set_channel(channel_number, ifaceName, control_socket_path='/var/run/hostapd')
+        return True
+    
+    def get_num_clients(self):
+        '''
+            Returns a list of number of clients of each ap
+        '''
+        client_nums = []
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    infos = device.get_info_of_connected_devices(interface)
+                    client_nums.append(len(infos))
+        return client_nums
+    
+    def get_num_current_neighbours(self):
+        '''
+            Returns a list of numbers of neighbours of each ap
+        '''
+        neighbours = []
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    infos = device.get_current_neighbours(interface)
+                    neighbours.append(len(infos))
+        return neighbours
+    
+    def get_num_neighbours(self):
+        '''
+            Returns a list of numbers of neighbours of each ap
+        '''
+        neighbours = []
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    try:
+                        infos = device.get_neighbours(interface)
+                        neighbours.append(len(infos))
+                    except AttributeError:
+                        if self.scenarios > 1:
+                            print("Device module does not support get_neighbours, but there are multiple scenarios!")
+                        neighbours.append(1)
+        return neighbours
+
+    def get_bandwidth(self):
+        '''
+            Returns a list of the bandwidth of all transmitted data from one
+            controlled device to a client. The data is structured as follows:
+            {
+                'MAC_of_client1' : {
+                    'mac' : 'MAC_of_client1',
+                    'bandwidth': bandwidth to the client,
+                    'node': {
+                        'hostname': 'hostname of my AP node',
+                        'uuid': 'uuid of my AP node'
+                    },
+                    'device': {
+                        'name': 'device name of the AP's physical interface',
+                        'uuid': 'uuid of the device',
+                    },
+                    'interface': 'name of the interface'
+                }
+            }
+            Notice: new devices have bandwidth 0!
+        '''
+        bandwidth = {}
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                if type(device.my_control_flow) is not list:
+                    device.my_control_flow = []
+                    for i in range(self.scenarios):
+                        device.my_control_flow.append([])
+                
+                for flow in device.my_control_flow[self.currentScenario]:
+                    flow['old'] = True
+                
+                for interface in device.get_interfaces():
+                    infos = device.get_info_of_connected_devices(interface)
+                    
+                    for mac in infos:
+                        values = infos[mac]
+                        newTxBytes = int(values['tx bytes'][0])
+                        
+                        flow =  [d for d in device.my_control_flow[self.currentScenario] if d['mac address'] == mac]
+                        if len(flow) > 0:
+                            flow = flow[0]
+                            dif = datetime.datetime.now() - flow['last update']
+                            tmpBandwidth = (newTxBytes - flow['tx bytes'] ) / (dif.total_seconds() + dif.microseconds / 1000000.0)
+                            if(self.simulation and self.simulationsteptime):
+                                #print("calculate bandwidth: " + str(newTxBytes - flow['tx bytes']) + " Bytes in " + str(self.simulationsteptime))
+                                tmpBandwidth = (newTxBytes - flow['tx bytes'] ) / (self.simulationsteptime)
+                            bandwidth[mac] = {
+                                'bandwidth':(tmpBandwidth),
+                                'node': {'hostname': node.hostname, 'uuid': node.uuid},
+                                'device': {'name': device.name, 'uuid': device.uuid},
+                                'interface': interface}
+                            flow['tx bytes'] = newTxBytes
+                            flow['last update'] = datetime.datetime.now()
+                            flow['old'] = False
+                        else :
+                            device.my_control_flow[self.currentScenario].append({'mac address' : mac, 'tx bytes' : newTxBytes, 'last update' : datetime.datetime.now(), 'old' : False})
+                            bandwidth[mac] = {
+                                'mac' : mac,
+                                'bandwidth': 0,
+                                'node': {'hostname': node.hostname, 'uuid': node.uuid},
+                                'device': {'name': device.name, 'uuid': device.uuid},
+                                'interface': interface}
+                
+                for flow in device.my_control_flow[self.currentScenario]:
+                    if flow['old']:
+                        device.my_control_flow[self.currentScenario].remove(flow)
+        return bandwidth
+    
+    def _get_raw_clientlist(self):
+        '''
+            Returns a list of the bandwidth of all transmitted data from one
+            controlled device to a client. The data is structured as follows:
+            {
+                'MAC_of_client1' : {
+                    'mac' : 'MAC_of_client1',
+                    'node': {
+                        'hostname': 'hostname of my AP node',
+                        'uuid': 'uuid of my AP node'
+                    },
+                    'device': {
+                        'name': 'device name of the AP's physical interface',
+                        'uuid': 'uuid of the device',
+                    },
+                    'interface': 'name of the interface'
+                }
+            }
+            Notice: new devices have bandwidth 0!
+        '''
+        clientlist = {}
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                if type(device.my_control_flow) is not list:
+                    device.my_control_flow = []
+                    for i in range(self.scenarios):
+                        device.my_control_flow.append([])
+                
+                for flow in device.my_control_flow[self.currentScenario]:
+                    flow['old'] = True
+                
+                for interface in device.get_interfaces():
+                    infos = device.get_info_of_connected_devices(interface)
+                    
+                    for mac in infos:
+                        clientlist[mac] = {
+                            'node': {'hostname': node.hostname, 'uuid': node.uuid},
+                            'device': {'name': device.name, 'uuid': device.uuid},
+                            'interface': interface}
+        return clientlist
+        
+    def get_interfaces(self):
+        '''
+            Returns a data structure of all available interfaces in the system
+            It is structured as follows:
+            [
+                {
+                    'hostname' : 'hostname of node1',
+                    'uuid' : 'uuid of node1',
+                    'devices' : [
+                        {
+                            'name' : 'name of device1',
+                            'uuid' : 'uuid of device1',
+                            'interfaces' : [
+                                'name of iface1', 'name of iface2'
+                            ]
+                        },
+                    ],
+                    ...
+                },
+                ...
+            ]
+            
+            fills self.aporder. Map index in uniflex to index in order list
+            fills self.observationOrder. Map index in agent to index in uniflex list
+        '''
+        orphanApId  = 0
+        if self.aporder:
+            orphanApId = len(self.aporder)
+        
+        self.actionOrder = []
+        interfaces = []
+        for node in self.get_nodes():
+            nodeinfo = {'hostname': node.hostname, 'uuid': node.uuid}
+            devices = []
+            for device  in node.get_devices():
+                devinfo = {'name': device.name, 'uuid': device.uuid}
+                interfaces_tmp = []
+                for interface in device.get_interfaces():
+                    interfaces_tmp.append(interface)
+                    if self.aporder:
+                        try:
+                            mac = device.getHwAddr(interface)
+                            indexInOrder = self.aporder.index(mac)
+                            self.actionOrder.append(indexInOrder)
+                        except ValueError:
+                            print("Device is unknown:" + mac)
+                            self.actionOrder.append(orphanApId)
+                            orphanApId += 1
+                    else:
+                        self.actionOrder.append(orphanApId)
+                        orphanApId += 1
+                
+                devinfo['interfaces'] = interfaces_tmp
+                devices.append(devinfo)
+            nodeinfo['devices'] = devices
+            interfaces.append(nodeinfo)
+        
+        self.observationOrder = []
+        print(self.actionOrder)
+        for i in range(0, len(self.actionOrder)):
+            self.observationOrder.append(self.actionOrder.index(i))
+        return interfaces
+
+    def get_channels(self):
+        '''
+            Collects and returns a list of the channel to interface mapping
+            [
+                {'channel number' : 'number of the channel',
+                'channel width' : 'width of the channel',
+                'node': {
+                    'hostname': 'hostname of my AP node',
+                    'uuid': 'uuid of my AP node'
+                },
+                'device': {
+                    'name': 'device name of the AP's physical interface',
+                    'uuid': 'uuid of the device',
+                },
+                'interface': 'name of the interface'
+            ]
+        '''
+        channel_mapping = []
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    chnum = device.get_channel(interface)
+                    chw = device.get_channel_width(interface)
+                    
+                    channel_mapping.append({
+                        'channel number' : chnum,
+                        'channel width' : chw,
+                        'device' : {'name': device.name, 'uuid': device.uuid},
+                        'node' : {'hostname': node.hostname, 'uuid': node.uuid},
+                        'interface' : interface})
+        return channel_mapping
+
+    def simulate_flows(self):
+        '''
+            Simulate packet counters on simulated APs 
+        '''
+        
+        flows = []
+        
+        #collect state(channels and bandwidth) of all devices
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    chnum = device.get_channel(interface)
+                    chw = device.get_channel_width(interface)
+                    infos = device.get_info_of_connected_devices(interface)
+                    mac = device.getHwAddr()
+                    
+                    flows.append({'mac address' : mac, 'channel number' : chnum, 'channel width' : chw, 'iface': interface})
+        print("simulate for " + str(self.currentScenario))
+        # simulate packet counter on AP modules
+        for node in self.get_nodes():
+            for device  in node.get_devices():
+                for interface in device.get_interfaces():
+                    device.set_packet_counter(flows, interface, self.simulationsteptime, self.currentScenario)
+
+    @modules.on_event(PeriodicEvaluationTimeEvent)
+    def periodic_evaluation(self, event):
+        # go over collected samples, etc....
+        # make some decisions, etc...
+        print("Periodic Evaluation")
+        print("My nodes: ", [node.hostname for node in self.get_nodes()])
+        self.timer.start(self.timeInterval)
+
+        if len(self.get_nodes()) == 0:
+            return
+        self.reset()
+        self.execute_action([1])
+        print(self.get_observation())
+    
+    
+    
+    '''
+    OpenAI Gym Uniflex env API
+    '''
+    
+    
+    def reset(self):
+        print("reset")
+        self.registeredClients = self._create_interface_list()
+        self.observationSpace = self.get_observationSpace()
+        self.actionSpace = self.get_actionSpace()
+        self.actionSet = []
+        self.currentScenario = 0
+        
+        self.observations = []
+        for obs in range(self.scenarios):
+            obsElem = []
+            neighbours_nums = self.get_num_neighbours()
+            for i in range(0, len(neighbours_nums)):
+                obsElem.append([0, 0])
+            self.observations.append(obsElem)
+        
+        interfaces = self.get_interfaces()
+        
+        # set a start channel for each interface:
+        channel = 1
+        for node in interfaces:
+            for device in node['devices']:
+                for iface in device['interfaces']:
+                    self.set_channel(
+                        node['uuid'], device['uuid'], iface, channel, None)
+                    channel += 6
+                    if channel > 14:
+                        channel = 1
+        
+        # fill obsersavion buffer
+        for i in range(self.scenarios):
+            self.get_observation()
+            # clear bandwidth counter
+            if(self.simulation):
+                self.simulate_flows()
+            self.get_bandwidth()
+        
+        self.currentScenario = self.scenarios -1
+        return
+    
+    def execute_action(self, action):
+        '''
+            Map scalar action to channel vector
+            channel value = (action/numberOfChannels^AP_id) mod numberOfChannels
+        '''
+        for index, interface in enumerate(self._create_interface_list()):
+            apindex = self.actionOrder[index]
+            ifaceaction = action[apindex]
+            #self.log.info(str(index) + "List-AP is " + str(apindex) + " registered AP, gets channel " + str(self.availableChannels[int(ifaceaction)]) + ", UUID: " +str(interface['device']))
+            #ifaceaction = int(action / (pow(len(self.availableChannels),apindex)))
+            #ifaceaction = ifaceaction % len(self.availableChannels)
+            self.set_channel(interface['node'], interface['device'], interface['iface'],
+                                self.availableChannels[int(ifaceaction)], None)
+        return
+    
+    def render():
+        return
+    
+    def get_observationSpace(self):
+        '''
+            Returns observation space for open AI gym
+            Observation space is a matrix of number of APs * 2
+            First column represents the number of clients per ap,
+            the second column the numer of neighbouring aps
+            the maximum is 10
+        '''
+        #maxValues = [len(self.availableChannels) for i in self._create_interface_list()]
+        return spaces.Box(low=0, high=numpy.iinfo(numpy.uint32).max, shape=(len(self._create_interface_list()),2), dtype=numpy.uint32)
+        #return spaces.MultiDiscrete(maxValues)
+        #spaces.Box(low=0, high=10000000, shape=(len(self.observationSpace),), dtype=numpy.float32)
+    
+    def get_actionSpace(self):
+        '''
+            Returns action space for open AI gym
+            result is a Discrete scalar space
+            dimension is NumberOfChannels^NumberOfAPs
+        '''
+        interfaceList = self._create_interface_list();
+        if(len(interfaceList) > 0):
+            self.log.info("UUIDs of the action space")
+        for key, interface in enumerate(interfaceList):
+            self.log.info(str(key) + ":" + interface['device'])
+        if len(interfaceList) == 0:
+            return spaces.MultiDiscrete([])
+        maxValues = [len(self.availableChannels) for i in self._create_interface_list()]
+        return spaces.MultiDiscrete(maxValues)
+        #([ 5, 2, 2 ])(pow(len(self.availableChannels), len(interfaceList)))
+    
+    def get_observation(self):
+        '''
+            Returns vector with state (channel) of each AP
+        '''
+        client_nums = self.get_num_clients()
+        neighbours_nums = self.get_num_neighbours()
+        resultUniflexOrder = []
+        for i in range(0, len(neighbours_nums)):
+            resultUniflexOrder.append([client_nums[i], neighbours_nums[i]])
+        #switch order of values in list
+        result = []
+        
+        for i in range(0, len(resultUniflexOrder)):
+            if i >= len(self.observationOrder):
+                break
+            result.append(resultUniflexOrder[self.observationOrder[i]])
+        
+        if self.mode == "training":
+            #store obsersavion for next time with this scenario
+            self.observations[self.currentScenario] = result
+            #load obsersavion for next scenario
+            self.currentScenario += 1
+            if self.currentScenario >= self.scenarios:
+                self.currentScenario = 0
+            result = self.observations[self.currentScenario]
+        return result
+    
+    # game over if there is a new interface
+    def get_gameOver(self):
+        '''
+            Test if topology changes
+            Bases on information, which client is registered at which AP
+        '''
+        clients = self._create_interface_list()
+        clientHash = [i['node'] + i['device'] + i['iface'] for i in clients] # i['mac']
+        observationSpaceHash = [i['node'] + i['device'] + i['iface'] for i in self.registeredClients] #i['mac']
+        return not len(set(clientHash).symmetric_difference(set(observationSpaceHash))) == 0
+    
+    def get_reward(self):
+        '''
+            Calculate reward for the current state
+            reward = sum (sqrt(throughput of client))
+        '''
+        # for simulation
+        if(self.simulation):
+            self.simulate_flows()
+        
+        bandwidthList = self.get_bandwidth()
+        #self.log.info("Bandwidth: " + str(bandwidthList))
+        #bandwidth = sorted(bandwidth, key=lambda k: k['mac'])
+        reward = 0
+        for key in bandwidthList:
+            item = bandwidthList[key]
+            if item['bandwidth'] < 0:
+                print("Bandwidth has invalid value: " + str(item['bandwidth']))
+                print(bandwidthList)
+                continue
+            reward += sqrt(item['bandwidth'])
+        return reward
+    
+    
+    
+    def _get_bandwidth_by_client(self, bandwidthList, clientData):
+        '''
+            extracts bandwidth of client from bandwidth list
+            :param bandwidthList: List of all clients, the AP they are associated with and their bandwidth
+            :param clientData: data of the client. 
+        '''
+        for mac, client in bandwidthList.items():
+            if (mac == clientData['mac']) and (client['node']['uuid'] == clientData['node']) and (client['device']['uuid'] == clientData['device']) and (client['interface'] == clientData['iface']):
+                return client['bandwidth']
+        return None
+    
+    def _create_client_list(self):
+        '''
+            create linear client list
+            result is list of dictionarys with attribute: mac, node, device, iface
+        '''
+        clientList = []
+        clients = self._get_raw_clientlist()
+        for mac, client in clients.items():
+            clientList.append({'mac': mac, 'node': client['node']['uuid'],
+                'device': client['device']['uuid'], 'iface': client['interface']})
+        clientList = sorted(clientList, key=lambda k: k['mac'])
+        return clientList
+    
+    def _create_interface_list(self):
+        '''
+            create linear ap list
+            result is list of dictionarys with attribute: node, device, iface
+        '''
+        interfaceList = []
+        interfaces = self.get_interfaces()
+        for node in interfaces:
+            for device in node['devices']:
+                for iface in device['interfaces']:
+                    interfaceList.append({'node': node['uuid'], 'device': device['uuid'], 'iface': iface})
+        return interfaceList
diff --git a/openAI_RRM/common.py b/openAI_RRM/common.py
new file mode 100755
index 0000000..370d25f
--- /dev/null
+++ b/openAI_RRM/common.py
@@ -0,0 +1,28 @@
+from uniflex.core import events
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class AveragedSpectrumScanSampleEvent(events.EventBase):
+    def __init__(self, avg):
+        super().__init__()
+        self.avg = avg
+
+
+class StartMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class StopMyFilterEvent(events.EventBase):
+    def __init__(self):
+        super().__init__()
+
+
+class ChangeWindowSizeEvent(events.EventBase):
+    def __init__(self, value):
+        super().__init__()
+        self.window = value
diff --git a/openAI_RRM/config_master.yaml b/openAI_RRM/config_master.yaml
new file mode 100644
index 0000000..c5e8f1d
--- /dev/null
+++ b/openAI_RRM/config_master.yaml
@@ -0,0 +1,31 @@
+## UniFlex Agent config file
+
+config:
+  name: "Global_Controller"
+  info: 'agent hosts global controller'
+  iface: 'lo'
+  sub: "tcp://192.168.10.157:8990"
+  pub: "tcp://192.168.10.157:8989"
+
+broker:
+  xpub: "tcp://192.168.10.157:8990"
+  xsub: "tcp://192.168.10.157:8989"
+
+control_applications:
+  myController:
+      file : channel_controller.py
+      class_name : UniflexChannelController
+      openAIGymController: True
+      kwargs : {
+            'availableChannels' : [1,5]
+      }
+
+modules:
+  discovery:
+        module : uniflex_app_discovery_pyre
+        class_name : PyreDiscoveryMasterModule
+        kwargs: {"iface":"lo",
+                 "groupName":"uniflex_1234",
+                 "sub":"tcp://192.168.10.157:8990",
+                 "pub":"tcp://192.168.10.157:8989"
+                }
diff --git a/openAI_RRM/config_master_simulation.yaml b/openAI_RRM/config_master_simulation.yaml
new file mode 100644
index 0000000..cad4e10
--- /dev/null
+++ b/openAI_RRM/config_master_simulation.yaml
@@ -0,0 +1,36 @@
+## UniFlex Agent config file
+
+config:
+  name: "Global_Controller"
+  info: 'agent hosts global controller'
+  iface: 'lo'
+  sub: "tcp://127.0.0.1:8990"
+  pub: "tcp://127.0.0.1:8989"
+
+#broker:
+#  xpub: "tcp://127.0.0.1:8990"
+#  xsub: "tcp://127.0.0.1:8989"
+
+control_applications:
+  myController:
+      file : channel_controller.py
+      class_name : UniflexChannelController
+      openAIGymController: True
+      kwargs : {
+            'availableChannels' : [1,5],
+            'steptime' : 1,
+            'simulation': True,
+            'mode': 'training', # training or working
+            'scenarios': 1,
+            'order': ['aa:aa:aa:aa:aa:01', 'aa:aa:aa:aa:aa:02','aa:aa:aa:aa:aa:03', 'aa:aa:aa:aa:aa:04', 'aa:aa:aa:aa:aa:05','aa:aa:aa:aa:aa:06', 'aa:aa:aa:aa:aa:07','aa:aa:aa:aa:aa:08']
+      }
+
+modules:
+  discovery:
+        module : uniflex_app_discovery_pyre
+        class_name : PyreDiscoveryMasterModule
+        kwargs: {"iface":"lo",
+                 "groupName":"uniflex_1234",
+                 "sub":"tcp://127.0.0.1:8990",
+                 "pub":"tcp://127.0.0.1:8989"
+                }
diff --git a/openAI_RRM/config_slave.yaml b/openAI_RRM/config_slave.yaml
new file mode 100644
index 0000000..0488bb6
--- /dev/null
+++ b/openAI_RRM/config_slave.yaml
@@ -0,0 +1,32 @@
+## UniFlex Agent config file
+
+config:
+  name: 'HC node'
+  info: 'filter runs on local node'
+  iface: 'lo'
+  iface: 'lo'
+  sub: "tcp://192.168.10.157:8990"
+  pub: "tcp://192.168.10.157:8989"
+
+#broker:
+#  xpub: "tcp://192.168.10.157:8990"
+#  xsub: "tcp://192.168.10.157:8989"
+
+control_applications:
+  myFilter:
+      file : my_filter.py
+      class_name : MyAvgFilter
+      kwargs : {"window": 5}
+
+  discovery:
+      module : uniflex_app_discovery_pyre
+      class_name : PyreDiscoverySlaveModule
+      kwargs: {"iface":"lo", "groupName":"uniflex_1234"}
+
+modules:
+  simple:
+      module : uniflex_module_wifi
+      class_name : WifiModule
+      devices : ['phy0']
+      kwargs : {}
+
diff --git a/openAI_RRM/my_filter.py b/openAI_RRM/my_filter.py
new file mode 100755
index 0000000..59d59a2
--- /dev/null
+++ b/openAI_RRM/my_filter.py
@@ -0,0 +1,53 @@
+import logging
+from uniflex.core import modules
+from sbi.radio_device.events import SpectralScanSampleEvent
+from common import AveragedSpectrumScanSampleEvent
+from common import ChangeWindowSizeEvent
+
+__author__ = "Piotr Gawlowicz"
+__copyright__ = "Copyright (c) 2016, Technische Universität Berlin"
+__version__ = "0.1.0"
+__email__ = "{gawlowicz}@tkn.tu-berlin.de"
+
+
+class MyAvgFilter(modules.ControlApplication):
+    def __init__(self, window):
+        super(MyAvgFilter, self).__init__()
+        self.log = logging.getLogger('MyFilter')
+        self.window = window
+        self.samples = []
+
+    @modules.on_event(ChangeWindowSizeEvent)
+    def change_window_size(self, event):
+        self.log.info("New window size: {}".format(event.window))
+        self.window = event.window
+
+    def change_window_size_func(self, newWindow):
+        self.log.info("New window size: {}".format(newWindow))
+        self.window = newWindow
+
+    def get_window_size(self):
+        return self.window
+
+    @modules.on_event(SpectralScanSampleEvent)
+    def serve_spectral_scan_sample(self, event):
+        sample = event.sample
+        node = event.node
+        device = event.device
+        self.log.debug("New SpectralScan Sample:{} from node {}, device: {}"
+                       .format(sample, node, device))
+
+        self.samples.append(sample)
+
+        if len(self.samples) == self.window:
+            s = sum(self.samples)
+            self.samples.pop(0)
+            avg = s / self.window
+            self.log.debug("Calculated average: {}".format(avg))
+            event = AveragedSpectrumScanSampleEvent(avg)
+            self.send_event(event)
+
+    def add_two(self, value):
+        value1 = value + 2
+        value2 = value * 2
+        return [value1, value2]
diff --git a/openAI_RRM/readme.txt b/openAI_RRM/readme.txt
new file mode 100644
index 0000000..e8944b0
--- /dev/null
+++ b/openAI_RRM/readme.txt
@@ -0,0 +1,20 @@
+# Start environment if Uniflex is installed in some
+source ~/Uniflex/dev/bin/activate
+
+# 2a. Run control program in master node:
+uniflex-broker
+# 2b. Run control program in master node:
+python3 rl_agent.py --config ./config_master.yaml
+# or
+python3 rl_agent.py --config ./config_master_simulation.yaml
+# you can choose rl_agent_multi.py, thompson_agent.py or thompson_agent2.py, too
+
+# 2c. Run modules in slave node:
+#Linux WiFi AP
+uniflex-agent --config ./config_slave.yaml
+#Simulation
+uniflex-agent --config ./SimulationSlavesConfig/##Name of Experiment##/config_slave.yaml
+uniflex-agent --config ./SimulationSlavesConfig/##Name of Experiment##/config_slave2.yaml
+# and so on
+
+# For debugging mode run with -v option
diff --git a/openAI_RRM/rl_agent.py b/openAI_RRM/rl_agent.py
new file mode 100644
index 0000000..9f5c9e6
--- /dev/null
+++ b/openAI_RRM/rl_agent.py
@@ -0,0 +1,354 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gym
+import UniFlexGym
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+import numpy as np
+from tensorflow import keras
+import argparse
+import logging
+import time
+import csv
+import os
+from math import *
+from scipy.optimize import fsolve
+import pickle
+import datetime
+
+AVGTIME_ONEVALUE_RAND = 60
+RANDVALUE_FIRST_EPISODE = 0.7
+REWARD_INIT = 0.00001
+SORT_VALUES = True
+
+sortedIndecies = []
+ac_space = []
+scenarios = 5
+currentScenario = 0
+
+lastreward = np.zeros(scenarios)
+minreward = np.zeros(scenarios)
+maxreward = np.zeros(scenarios)
+lastaction = np.zeros(scenarios)
+
+def normalize_state(state, ob_space, s_size):
+    global sortedIndecies
+    state = np.array(state)
+    #sort states
+    index = np.arange(state.shape[0])
+    index = index.reshape((-1,1))
+    state = np.concatenate((state, index), axis=1)
+    #sort input and output if configured
+    if SORT_VALUES:
+        state = np.sort(state.view('i8,i8,i8'), order=['f0', 'f1'], axis=0).view(np.int)
+    #print("state" + str(state))
+    sortedIndecies = state[:,-1]
+    #print(sortedIndecies)
+    state = np.delete(state, -1, axis=1)
+    
+    state = np.reshape(state, [1, s_size])
+    # obspacehigh = np.reshape(ob_space.high, [1, s_size])
+    state = state - 1 #*2 / obspacehigh - 1
+    
+    return state
+
+def guess_random_numbers_in_firstEpisode(a_size):
+    return AVGTIME_ONEVALUE_RAND * a_size * scenarios#**2
+
+def guess_steps(a_size):
+    return guess_random_numbers_in_firstEpisode(a_size) / RANDVALUE_FIRST_EPISODE
+
+def guess_epsilon_decay(steps, a_size):
+    func = lambda epsilon_decay: guess_random_numbers_in_firstEpisode(a_size) - (1-epsilon_decay**(steps + 1)) / (1 - epsilon_decay)
+    return fsolve(func, 0.9999999999)[0]
+
+def map_action(mappedAction):
+    action = np.zeros(len(ac_space.nvec))
+    for index in range(len(ac_space.nvec)):
+        # filter action by the index
+        ifaceaction = int(mappedAction / (pow(ac_space.nvec[0] ,index)))
+        ifaceaction = ifaceaction % ac_space.nvec[0]
+        #print("ifaceaction at " + str(index) + " is " + str(ifaceaction))
+        #print("Find " + str(index) + "in sorted indecies" + str(sortedIndecies)+ "at" + str(np.where(sortedIndecies == index)))
+        #action[np.where(sortedIndecies == index)[0]] = ifaceaction
+        action[sortedIndecies[index]] = ifaceaction
+    return action
+
+def reset_rewards():
+    global maxreward
+    global minreward;
+    for i in range(scenarios):
+        maxreward[i] = REWARD_INIT
+        minreward[i] = np.inf
+    return
+
+def normalize_reward(reward, rewardpow, action):
+    global maxreward
+    global minreward;
+    global lastreward;
+    global currentScenario;
+    
+    orig = reward
+    
+    minreward[currentScenario] = min(reward, minreward[currentScenario])
+    reward -= minreward[currentScenario]
+    
+    maxreward[currentScenario] = max(reward, maxreward[currentScenario])
+    reward /= maxreward[currentScenario]
+    
+    print("reward:" + str(orig) + ", minreward:" + str(minreward[currentScenario]) + ", maxreward:" +str(maxreward[currentScenario]) + ", at scenario" + str(currentScenario))
+    
+    #set reward to 1.0 if it is first value
+    if maxreward[currentScenario] == REWARD_INIT:
+        reward = 1.0
+    
+    reward = pow(reward, rewardpow)
+    
+    #hysteresis
+    if action != lastaction[currentScenario] and abs(reward - lastreward[currentScenario]) < 0.1:
+        reward *= 0.9
+    lastaction[currentScenario] = action
+    lastreward[currentScenario] = reward
+    
+    return reward
+
+parser = argparse.ArgumentParser(description='Uniflex reader')
+parser.add_argument('--config', help='path to the uniflex config file', default=None, required=True)
+parser.add_argument('--output', help='path to a csv file for agent output data', default=None)
+parser.add_argument('--plot', help='activate plotting', default=None)
+parser.add_argument('--steptime', help='interval between two steps', default=1)
+#parser.add_argument('--steps', help='number of steps per episode. If not set, the agents runs infinitly long', default=None)
+parser.add_argument('--episodes', help='number of episodes in this execution. If not set, the agents runs infinitly long', default=None)
+parser.add_argument('--startepisode', help='The episode we start with', default=1)
+parser.add_argument('--trainingfile', help='file to load and store training data', default=None)
+parser.add_argument('--cpus', help='Numbers of cpus for this process', default=1)
+
+args = parser.parse_args()
+if not args.config:
+    print("No config file specified!")
+    os._exit(1)
+if not args.output:
+    print("No output file specified! - Skip data")
+if not args.trainingfile:
+    print("No training file specified! - Start with unlearned agent")
+    
+if args.plot:
+    import matplotlib.pyplot as plt
+
+print("Start at episode " + str(args.startepisode))
+
+#create uniflex environment, steptime is 10sec
+env = gym.make('uniflex-v0')
+#env.configure()
+env.start_controller(steptime=float(args.steptime), config=args.config)
+
+epsilon_max = 1.0               # exploration rate
+epsilon_min = 0.01
+#epsilon_decay = 0.99
+
+numChannels = 2
+
+while True:
+    
+    state = env.reset()
+    currentScenario = 0
+    
+    n = 0
+    ac_space = env.action_space
+    ob_space = env.observation_space
+    
+    print("reset agent")
+    print("Observation space: ", ob_space,  ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.nvec)
+
+    tmps_size = ob_space.shape
+    s_size = tmps_size[0] * tmps_size[1]
+    #s_size = list(map(lambda x: x * ob_space.high, s_size))
+    a_size = pow(ac_space.nvec[0], ac_space.nvec.shape[0])
+    
+    if a_size == 0:
+        print("there is no vaild AP - sleep 2 seconds")
+        time.sleep(2)
+        continue
+    
+    print("observation_space size:" + str(s_size))
+    
+    state = normalize_state(state, ob_space, s_size)
+    
+    model = keras.Sequential()
+    model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='sigmoid'))
+    model.add(keras.layers.Dense(5, activation='relu'))
+    model.add(keras.layers.Dense(a_size, activation='softmax'))
+    model.compile(optimizer=tf.train.AdamOptimizer(0.001),
+                  loss='categorical_crossentropy',
+                  metrics=['accuracy'])
+    
+    config = tf.ConfigProto()
+    config.intra_op_parallelism_threads = int(args.cpus)
+    config.inter_op_parallelism_threads = int(args.cpus)
+    tf.Session(config=config)
+
+    if args.trainingfile and not os.path.isfile(args.trainingfile):
+        try:
+            model.load_weights(args.trainingfile)
+            print("Load model")
+        except ValueError:
+            print("Spaces does not match")
+        except tf.errors.NotFoundError:
+            print("File not found. Skip loading")
+        
+        try:
+            with open(args.trainingfile + '.var', 'rb') as f:  # Python 3: open(..., 'wb')
+                lastreward, minreward, maxreward,  lastaction = pickle.load(f)
+                print("Load variables of last run")
+        except ValueError:
+            print("File format is wrong")
+        except FileNotFoundError:
+            print("File not found. Skip loading")
+    
+    print("State (Observation) of System" + str(state))
+    try:
+        state = np.reshape(state, [1, s_size])
+    except ValueError:
+        continue
+    rewardsum = 0
+    
+    steps = guess_steps(a_size)
+    epsilon_decay = guess_epsilon_decay(steps, a_size)
+    print("Initialize agent. Exploration rate is " + str(epsilon_decay) 
+        + ", an episode has at most " + str(steps) + " steps")
+    
+    rewardpow = int(log(a_size, 2))
+    
+    episode = 1
+    reset_rewards()
+    
+    while episode < int(args.startepisode):
+        epsilon_max *= 0.999
+        epsilon_max = max(pow(epsilon_max, 3), epsilon_min)
+        episode += 1
+    
+    # Schleife für Episoden
+    while True:
+        print("start episode")
+        
+        run = 0
+        runs = []
+        rewards = []
+        actions = []
+        
+        epsilon = epsilon_max
+        epsilon_max *= 0.999
+        epsilon_max = max(pow(epsilon_max, 3), epsilon_min)
+        done = False
+        
+        aps = int(log(a_size, numChannels))
+        
+        #for i in range(0, aps):
+        #    actions.append([])
+        
+        state = env.reset()
+        state_orig = state
+        currentScenario = 0
+        state = normalize_state(state, ob_space, s_size)
+        
+        while not done:
+            # Choose action
+            ts = time.time()
+            print("\nnew step at " + datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S,%f'))
+            print ("Run: " + str(run) + ", Episode: " + str(episode))
+            print("Observation:" + str(state_orig))
+            
+            if np.random.rand(1) < epsilon:
+                action = np.random.randint(a_size)
+            else:
+                action = np.argmax(model.predict(state)[0])
+
+            actionvector = map_action(action)
+            
+            print("Action:" +str(action) + ", Actionvector" + str(actionvector))
+            
+            # Step
+            next_state, reward, done, _ = env.step(actionvector)
+            
+            reward = normalize_reward(reward, rewardpow, action)
+            
+
+            if done:
+            #    print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
+            #          .format(e, total_episodes, time, rewardsum, epsilon))
+                reset_rewards()
+                print("setting changes")
+                break
+            
+            state_orig = next_state
+            
+            next_state = normalize_state(next_state, ob_space, s_size)
+
+            # Train
+            target = reward
+            if not done:
+                target = (reward)# + 0.95 * np.amax(model.predict(next_state)[0]))
+            
+            print("Scaled reward: " + str(target))
+            
+            target_f = model.predict(state)
+            print("agent learning" + str(target_f))
+            target_f[0][action] = target
+            print("agent new learning" + str(target_f))
+            model.fit(state, target_f, epochs=1, verbose=0)
+
+            #rewardsum += reward
+            if epsilon > epsilon_min: epsilon *= epsilon_decay
+            
+            #rewards.append(reward)
+            
+            if args.output:
+                with open(args.output, 'a') as csvFile:
+                    writer = csv.writer(csvFile)
+                    writer.writerow([reward, action, episode,currentScenario])
+                csvFile.close()
+            
+            #for ap in range(0, aps):
+            #    ifaceaction = int(action / (pow(numChannels, ap)))
+            #    ifaceaction = ifaceaction % numChannels
+            #    actions[ap].append(ifaceaction)
+            
+            print ("Reward: " + str(reward))
+            print ("GameOver: " + str(done))
+            #print ("State: " + str(state))
+            #print ("Channel selection:" + str(action))
+            
+            state = next_state
+            
+            if args.plot:
+                plt.subplot(211)
+                plt.plot(run, reward, 'bo')                 # Additional point
+                plt.ylabel('reward')
+                plt.subplot(212)
+                #for ap in range(0, aps):
+                #    plt.plot(actions[ap])
+                plt.plot(run, action, 'bo')                 # Additional point
+                plt.ylabel('action')
+                plt.xlabel('step')
+                plt.pause(0.05)
+            
+            currentScenario += 1
+            if currentScenario >= scenarios:
+                currentScenario = 0
+            
+            run += 1
+            
+            # next episode if enough steps, if enough episodes -> exit
+            if steps <= run:
+                if args.trainingfile:
+                    model.save_weights(args.trainingfile)
+                    with open(args.trainingfile + '.var', 'wb') as f:  # Python 3: open(..., 'wb')
+                        pickle.dump([lastreward, minreward, maxreward,  lastaction], f)
+                if args.episodes and int(args.episodes) <= episode:
+                    os._exit(1)
+                else:
+                    break
+            
+        episode += 1
diff --git a/openAI_RRM/rl_agent_multi.py b/openAI_RRM/rl_agent_multi.py
new file mode 100644
index 0000000..d6ebdae
--- /dev/null
+++ b/openAI_RRM/rl_agent_multi.py
@@ -0,0 +1,403 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gym
+import UniFlexGym
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+import numpy as np
+from tensorflow import keras
+import argparse
+import logging
+import time
+import csv
+import os
+import json
+from math import *
+from scipy.optimize import fsolve
+import pickle
+import datetime
+from functools import reduce
+
+AVGTIME_ONEVALUE_RAND = 5
+RANDVALUE_FIRST_EPISODE = 0.9
+REWARD_INIT = 0.00001
+SORT_VALUES = False
+SCENARIOS = 1
+EPSILON_MAX_DECAY = 0.999999999999 #0.95
+EPSILON_MIN = 0.01
+ACTIVATE_OBSERVER = False
+
+sortedIndecies = []
+ac_space = []
+currentScenario = 0
+
+def normalize_state(state, ob_space, s_size):
+    global sortedIndecies
+    state = np.array(state)
+    #sort states
+    index = np.arange(state.shape[0])
+    index = index.reshape((-1,1))
+    state = np.concatenate((state, index), axis=1)
+    #sort input and output if configured
+    if SORT_VALUES:
+        state = np.sort(state.view('i8,i8,i8'), order=['f0', 'f1'], axis=0).view(np.int)
+    #print("state" + str(state))
+    sortedIndecies = state[:,-1]
+    #print(sortedIndecies)
+    state = np.delete(state, -1, axis=1)
+    
+    state = np.reshape(state, [1, s_size])
+    # obspacehigh = np.reshape(ob_space.high, [1, s_size])
+    # state = state - 1 #*2 / obspacehigh - 1
+    
+    return state
+
+def guess_random_numbers_in_firstEpisode(a_size):
+    return AVGTIME_ONEVALUE_RAND * a_size * SCENARIOS#**2
+
+def guess_steps(a_size):
+    stepidea = guess_random_numbers_in_firstEpisode(a_size) / RANDVALUE_FIRST_EPISODE
+    #scale to multiple of scenario
+    stepidea = int(stepidea / SCENARIOS) * SCENARIOS
+    return stepidea
+
+def guess_epsilon_decay(steps, a_size):
+    func = lambda epsilon_decay: guess_random_numbers_in_firstEpisode(a_size) - (1-epsilon_decay**(steps )) / (1 - epsilon_decay)
+    return fsolve(func, 0.9999999999)[0]
+
+def map_action(mappedAction):
+    action = np.zeros(len(ac_space.nvec))
+    for index in range(len(ac_space.nvec)):
+        # filter action by the index
+        ifaceaction = int(mappedAction / (pow(ac_space.nvec[0] ,index)))
+        ifaceaction = ifaceaction % ac_space.nvec[0]
+        #print("ifaceaction at " + str(index) + " is " + str(ifaceaction))
+        #print("Find " + str(index) + "in sorted indecies" + str(sortedIndecies)+ "at" + str(np.where(sortedIndecies == index)))
+        #action[np.where(sortedIndecies == index)[0]] = ifaceaction
+        action[sortedIndecies[index]] = ifaceaction
+    return action
+
+def reset_rewards():
+    global maxreward
+    global minreward;
+    for i in range(SCENARIOS):
+        maxreward[i] = REWARD_INIT
+        minreward[i] = np.inf
+    return
+
+def normalize_reward(reward, rewardpow, action):
+    global maxreward
+    global minreward;
+    global lastreward;
+    global currentScenario;
+    
+    orig = reward
+    
+    minreward[currentScenario] = min(reward, minreward[currentScenario])
+    reward -= minreward[currentScenario]
+    
+    maxreward[currentScenario] = max(reward, maxreward[currentScenario])
+    reward /= maxreward[currentScenario]
+    
+    print("reward:" + str(orig) + ", minreward:" + str(minreward[currentScenario]) + ", maxreward:" +str(maxreward[currentScenario]) + ", at scenario" + str(currentScenario))
+    
+    #set reward to 1.0 if it is first value
+    if maxreward[currentScenario] == REWARD_INIT:
+        reward = 1.0 #/ (2**rewardpow)
+    
+    reward = pow(reward, rewardpow)
+    
+    #hysteresis
+    if action != lastaction[currentScenario] and abs(reward - lastreward[currentScenario]) < 0.1:
+        reward *= 0.9
+    lastaction[currentScenario] = action
+    lastreward[currentScenario] = reward
+    
+    return reward
+
+lastreward = np.zeros(SCENARIOS)
+minreward = np.zeros(SCENARIOS)
+maxreward = np.zeros(SCENARIOS)
+lastaction = np.zeros(SCENARIOS)
+
+
+parser = argparse.ArgumentParser(description='Uniflex reader')
+parser.add_argument('--config', help='path to the uniflex config file', default=None, required=True)
+parser.add_argument('--output', help='path to a csv file for agent output data', default=None)
+parser.add_argument('--plot', help='activate plotting', default=None)
+parser.add_argument('--steptime', help='interval between two steps', default=1)
+#parser.add_argument('--steps', help='number of steps per episode. If not set, the agents runs infinitly long', default=None)
+parser.add_argument('--episodes', help='number of episodes in this execution. If not set, the agents runs infinitly long', default=None)
+parser.add_argument('--startepisode', help='The episode we start with', default=1)
+parser.add_argument('--trainingfile', help='file to load and store training data', default=None)
+parser.add_argument('--cpus', help='Numbers of cpus for this process', default=1)
+
+args = parser.parse_args()
+if not args.config:
+    print("No config file specified!")
+    os._exit(1)
+if not args.output:
+    print("No output file specified! - Skip data")
+if not args.trainingfile:
+    print("No training file specified! - Start with unlearned agent")
+    
+if args.plot:
+    import matplotlib.pyplot as plt
+
+print("Start at episode " + str(args.startepisode))
+
+#create uniflex environment, steptime is 10sec
+env = gym.make('uniflex-v0')
+#env.configure()
+env.start_controller(steptime=float(args.steptime), config=args.config)
+
+epsilon_max = 1.0               # exploration rate
+#epsilon_decay = 0.99
+#epsilon_decay = 0.995
+
+time_history = []
+rew_history = []
+
+numChannels = 2
+
+observerData = []
+observerCounter = 0
+
+while True:
+    
+    state = env.reset()
+    currentScenario = 0
+    
+    n = 0
+    ac_space = env.action_space
+    ob_space = env.observation_space
+    
+    print("reset agent")
+    print("Observation space: ", ob_space,  ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.nvec)
+
+    tmps_size = ob_space.shape
+    s_size = tmps_size[0] * tmps_size[1]
+    #s_size = list(map(lambda x: x * ob_space.high, s_size))
+    a_size = 0
+    if len(ac_space.nvec) > 0:
+        a_size = int(pow(ac_space.nvec[0], ac_space.nvec.shape[0]))
+    
+    if a_size == 0:
+        print("there is no vaild AP - sleep 2 seconds")
+        time.sleep(2)
+        continue
+    
+    print("observation_space size:" + str(s_size))
+    
+    state = normalize_state(state, ob_space, s_size)
+    
+    model = keras.Sequential()
+    model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
+    #model.add(keras.layers.Dense(5, activation='relu'))
+    model.add(keras.layers.Dense(a_size, activation='softmax'))
+    model.compile(optimizer=tf.train.AdamOptimizer(0.001),
+                  loss='categorical_crossentropy',
+                  metrics=['accuracy'])
+    
+    config = tf.ConfigProto()
+    config.intra_op_parallelism_threads = int(args.cpus)
+    config.inter_op_parallelism_threads = int(args.cpus)
+    tf.Session(config=config)
+
+    print("State (Observation) of System" + str(state))
+    try:
+        state = np.reshape(state, [1, s_size])
+    except ValueError:
+        continue
+    rewardsum = 0
+    
+    steps = guess_steps(a_size)
+    epsilon_decay = guess_epsilon_decay(steps, a_size)
+    print("Initialize agent. Exploration rate is " + str(epsilon_decay) 
+        + ", an episode has at most " + str(steps) + " steps")
+    
+    rewardpow = int(log(a_size, 2))
+    
+    episode = 1
+    reset_rewards()
+    
+    if args.trainingfile and not os.path.isfile(args.trainingfile):
+        try:
+            model.load_weights(args.trainingfile)
+            print("Load model")
+        except ValueError:
+            print("Spaces does not match")
+        except tf.errors.NotFoundError:
+            print("File not found. Skip loading")
+        
+        try:
+            with open(args.trainingfile + '.var', 'r') as f:  # Python 3: open(..., 'wb')
+                temp = json.loads(f.read())
+                lastreward = np.array(temp['lastreward'])
+                minreward  = np.array(temp['minreward'])
+                maxreward  = np.array(temp['maxreward'])
+                lastaction = np.array(temp['lastaction'])
+                print("Load reward values of last run")
+                print("lastreward: " + str(lastreward))
+                print("minreward: " + str(minreward))
+                print("maxreward: " + str(maxreward))
+                print("lastaction: " + str(lastaction))
+        except ValueError as e:
+            print("File format is wrong" + str(e))
+        except IOError as e:
+            print("File not found. Skip loading" + str(e))
+    
+    
+    while episode < int(args.startepisode):
+        epsilon_max *= EPSILON_MAX_DECAY
+        epsilon_max = max(pow(epsilon_max, 3), EPSILON_MIN) # max(epsilon_max, EPSILON_MIN)#
+        episode += 1
+    
+    # Schleife für Episoden
+    while True:
+        print("start episode")
+        
+        run = 0
+        runs = []
+        rewards = []
+        actions = []
+        
+        epsilon = epsilon_max
+        epsilon_max *= EPSILON_MAX_DECAY
+        epsilon_max = max(pow(epsilon_max, 3), EPSILON_MIN) #max(epsilon_max, EPSILON_MIN)
+        done = False
+        
+        aps = int(log(a_size, numChannels))
+        
+        #for i in range(0, aps):
+        #    actions.append([])
+        
+        state = env.reset()
+        state_orig = state
+        currentScenario = 0
+        state = normalize_state(state, ob_space, s_size)
+        
+        while not done:
+            # Choose action
+            ts = time.time()
+            print("\nnew step at " + datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S,%f'))
+            print ("Run: " + str(run) + ", Episode: " + str(episode) + ", Scenario: " + str(currentScenario))
+            print("Observation:" + str(state_orig))
+            
+            if np.random.rand(1) < epsilon:
+                action = np.random.randint(a_size)
+            else:
+                action = np.argmax(model.predict(state)[0])
+
+            actionvector = map_action(action)
+            
+            print("Action:" +str(action) + ", Actionvector" + str(actionvector))
+            
+            # Step
+            next_state, reward, done, _ = env.step(actionvector)
+            
+            reward = normalize_reward(reward, rewardpow, action)
+            
+
+            if done:
+            #    print("episode: {}/{}, time: {}, rew: {}, eps: {:.2}"
+            #          .format(e, total_episodes, time, rewardsum, epsilon))
+                reset_rewards()
+                print("setting changes")
+                break
+            
+            state_orig = next_state
+            
+            next_state = normalize_state(next_state, ob_space, s_size)
+
+            # Train
+            target = reward
+            if not done:
+                target = (reward)# + 0.95 * np.amax(model.predict(next_state)[0]))
+            
+            print("Scaled reward: " + str(target))
+            
+            target_f = model.predict(state)
+            print("agent learning" + str(target_f))
+            target_f[0][action] = target
+            print("agent new learning" + str(target_f))
+            history = model.fit(state, target_f, epochs=1, verbose=0)
+            
+            # observer: Observe states in neural network
+            # if there is no change of the loss function within 10 steps and epsilon_max < 0.5
+            # then stop the execution. It detects changes within 0.01
+            if ACTIVATE_OBSERVER:
+                observerData.append(history.history['loss'][0])
+                observerCounter += 1
+                if(observerCounter > 10):
+                    observerCounter = 10
+                    observerData.pop(0)
+                    avg = reduce(lambda x, y: x + y, observerData) / len(observerData)
+                    indata = list(map(lambda x: abs(x-avg) < 0.01, observerData))
+                    complete = reduce(lambda x, y: x and y, indata)
+                    if complete and epsilon_max < 0.5:
+                        print("Accuracy: " + str(history.history['acc'][0]))
+                        print("Network is trained - exit")
+                        os._exit(0)
+
+            #rewardsum += reward
+            if epsilon > EPSILON_MIN: epsilon *= epsilon_decay
+            
+            #rewards.append(reward)
+            
+            if args.output:
+                with open(args.output, 'a') as csvFile:
+                    writer = csv.writer(csvFile)
+                    writer.writerow([reward, action, episode,currentScenario])
+                csvFile.close()
+            
+            #for ap in range(0, aps):
+            #    ifaceaction = int(action / (pow(numChannels, ap)))
+            #    ifaceaction = ifaceaction % numChannels
+            #    actions[ap].append(ifaceaction)
+            
+            print ("Reward: " + str(reward))
+            print ("GameOver: " + str(done))
+            #print ("State: " + str(state))
+            #print ("Channel selection:" + str(action))
+            
+            state = next_state
+            
+            if args.plot:
+                plt.subplot(211)
+                plt.plot(run, reward, 'bo')                 # Additional point
+                plt.ylabel('reward')
+                plt.subplot(212)
+                #for ap in range(0, aps):
+                #    plt.plot(actions[ap])
+                plt.plot(run, action, 'bo')                 # Additional point
+                plt.ylabel('action')
+                plt.xlabel('step')
+                plt.pause(0.05)
+            
+            currentScenario += 1
+            if currentScenario >= SCENARIOS:
+                currentScenario = 0
+            
+            run += 1
+            
+            # next episode if enough steps, if enough episodes -> exit
+            # store model and internal states on change of episode
+            if steps <= run:
+                if args.trainingfile:
+                    model.save_weights(args.trainingfile)
+                    with open(args.trainingfile + '.var', 'w') as f:  # Python 3: open(..., 'wb')
+                        f.write(json.dumps({
+                            'lastreward' : lastreward.tolist(),
+                            'minreward' : minreward.tolist(),
+                            'maxreward' : maxreward.tolist(),
+                            'lastaction' : lastaction.tolist()
+                        }))
+                if args.episodes and int(args.episodes) <= episode:
+                    os._exit(1)
+                else:
+                    break
+            
+        episode += 1
diff --git a/openAI_RRM/rl_agent_multi_evalnetwork.py b/openAI_RRM/rl_agent_multi_evalnetwork.py
new file mode 100644
index 0000000..d5cfe68
--- /dev/null
+++ b/openAI_RRM/rl_agent_multi_evalnetwork.py
@@ -0,0 +1,316 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+##TODO: Insert path to neuronal network
+
+import gym
+import UniFlexGym
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+import numpy as np
+from tensorflow import keras
+import argparse
+import logging
+import time
+import csv
+import os
+from math import *
+from scipy.optimize import fsolve
+from gym import spaces
+
+sortedIndecies = []
+ac_space = []
+BANDWITH_ON_CHANNEL = 54e6
+numChannels = 2
+SORT_VALUES = True
+topologies = [[1,2,1], [2,2,2], [1,0,1]]
+N_test = int(1000)
+aps = 3
+maxclients = 100
+
+def zeros(anz):
+    return np.zeros(anz)
+
+def ones(anz):
+    return np.ones(anz)
+
+def calculate_all_best_action(clients, aps):
+    aps_sort = sorted(aps)
+    #if string topology
+    if aps_sort == [1,1,2]:
+        #set ap in the middle to 1, all other to 0
+        result = [zeros(3), ones(3)]
+        result[0][aps.index(2)] = 1
+        result[1][aps.index(2)] = 0
+        return result
+    
+    # if island topology
+    if aps_sort == [0,1,1]:
+        #set on of the neighouring aps to 1, all other to 0
+        result = []
+        group = np.where(np.array(aps) == 1)
+        for elem in group[0]:
+            myresult = zeros(3)
+            myresult[elem] = 1
+            result.append(myresult)
+            myresult = ones(3)
+            myresult[elem] = 0
+            result.append(myresult)
+        return result
+    
+    #if all aps can hear all other
+    if aps_sort == [2,2,2]:
+        result = []
+        #get ap with most clients
+        clients_sort = sorted(clients)
+        ap_most = np.where(clients == clients_sort[2])
+        for elem in ap_most[0]:
+            myresult = zeros(3)
+            myresult[elem] = 1
+            result.append(myresult)
+            myresult = ones(3)
+            myresult[elem] = 0
+            result.append(myresult)
+        return result
+    # there is no topology
+    print("notopo")
+    return [zeros(3)]
+
+#test data consist on a vector of observations and a label vector of all valid 
+# action to this observations
+def generate_testdata(number, dimension, maxclients, topoplogies, sortValues):
+    data = []
+    labels = []
+    for i in range(number):
+        for topology in topologies:
+            index = np.array(range(dimension), dtype=np.int16)
+            clients = np.random.randint(maxclients, size=dimension)
+            mydata = np.vstack((clients, topology, index)).transpose()
+            labeldata = mydata
+            #if sortValues:
+            #    labeldata = np.sort(labeldata.view('i4,i4'), order=['f0', 'f1'], axis=0).view(np.int)
+            clients = labeldata[:,0]
+            topology = labeldata[:,1].tolist()
+            #mydata = np.reshape(mydata, [1, 2*dimension])
+            mydata = np.delete(mydata, 2, axis=1)
+            data.append(mydata)
+            #labels.append(channelvectors_to_label(calculate_all_best_action(clients, topology)))
+            labels.append(calculate_all_best_action(clients, topology))
+    return [data, labels]
+
+def normalize_state(state, ob_space, s_size):
+    global sortedIndecies
+    state = np.array(state)
+    
+    #sort states
+    index = np.arange(state.shape[0])
+    index = index.reshape((-1,1))
+    state = np.concatenate((state, index), axis=1)
+    #
+    if SORT_VALUES:
+        state = np.sort(state.view('i8,i8,i8'), order=['f0', 'f1'], axis=0).view(np.int)
+    sortedIndecies = state[:,-1]
+    state = np.delete(state, -1, axis=1)
+    state = np.reshape(state, [1, s_size])
+    obspacehigh = np.reshape(ob_space.high, [1, s_size])
+    #state = state *2 / obspacehigh - 1
+    state = state -1
+    
+    return state
+
+def map_action(mappedAction):
+    action = np.zeros(len(ac_space.nvec))
+    for index in range(len(ac_space.nvec)):
+        # filter action by the index
+        ifaceaction = int(mappedAction / (pow(ac_space.nvec[0] ,index)))
+        ifaceaction = ifaceaction % ac_space.nvec[0]
+        #print("ifaceaction at " + str(index) + " is " + str(ifaceaction))
+        #print("Find " + str(index) + "in sorted indecies" + str(sortedIndecies)+ "at" + str(np.where(sortedIndecies == index)))
+        #action[np.where(sortedIndecies == index)[0]] = ifaceaction
+        action[sortedIndecies[index]] = ifaceaction
+    return action
+
+def eval(clients):
+    errorcounter_cli = 0
+    errorcounter_ap  = 0
+    counter = 0
+    errorlog = ""
+
+    for client in clients:
+        ap = client['aps']
+        state_cli = np.array([client['clients'], ap])
+        #state_ap  = np.array([ap, client['clients']])
+        
+        state_cli = state_cli.transpose()
+        #state_ap  = state_ap.transpose()
+        
+        state_cli_norm = normalize_state(state_cli.tolist(), ob_space, s_size)
+        action = np.argmax(model.predict(state_cli_norm)[0])
+        actionvector = map_action(action)
+        
+        #state_ap_norm = normalize_state(state_ap.tolist(), ob_space, s_size)
+        #actionap = np.argmax(modelap.predict(state_ap_norm)[0])
+        #actionvectorap = map_action(actionap)
+        
+        success_cli = False
+        for tmp in client['valid']:
+            tmpval = True
+            for a, b in zip(actionvector, tmp):
+                if a != b:
+                    tmpval = False
+                    break
+            if tmpval:
+                success_cli = True
+                break
+        
+        #success_ap = False
+        #for tmp in client['valid']:
+        #    tmpval = True
+        #    for a, b in zip(actionvectorap, tmp):
+        #        if a != b:
+        #            tmpval = False
+        #            break
+        #    if tmpval:
+        #        success_ap = True
+        #        break
+        
+        output = "[Cli, Ap]: Cli:" + str(client['clients']) + ", AP:" + str(ap) + ", Action:" +str(action) + ", Actionvector" + str(actionvector) + ", " + str(success_cli) + " SortedID: " +str(sortedIndecies)
+        print(output)
+        #print("[Ap, Cli]: Cli:" + str(client['clients']) + ", AP:" + str(ap) + ", Action:" +str(actionap) + ", Actionvector" + str(actionvectorap) + ", " + str(success_ap))
+        counter += 1
+        
+        #if not success_ap:
+        #    errorcounter_ap +=1
+        
+        if not success_cli:
+            errorcounter_cli +=1
+            errorlog += output +"\n"
+
+    print("Errors in [Cli,Ap]:" + str(errorcounter_cli) + "/" + str(counter) + "(" + str(errorcounter_cli/counter*100) + "%)")
+    #print("Errors in [Ap,Cli]:" + str(errorcounter_ap) + "/" + str(counter) + "(" + str(errorcounter_ap/counter) + "%)")
+    print(errorlog)
+
+def calculate_reward(clients_p_ap, action):
+    reward = 0
+    
+    for ap in range(len(action)):
+        channel = action[ap]
+        
+        #search num aps on same channel
+        same_chan = 0
+        for act in action:
+            if act == channel:
+                same_chan += 1
+        
+        ap_bandwidth = BANDWITH_ON_CHANNEL/ same_chan
+        reward += clients_p_ap[ap] * sqrt(ap_bandwidth/clients_p_ap[ap])
+    return reward
+
+def get_best_reward(client, ap):
+    state_cli = np.array([client, ap])
+    #state_ap  = np.array([ap, client['clients']])
+    
+    state_cli = state_cli.transpose()
+    #state_ap  = state_ap.transpose()
+    
+    state_cli_norm = normalize_state(state_cli.tolist(), ob_space, s_size)
+    action = np.argmax(model.predict(state_cli_norm)[0])
+    actionvector = map_action(action)
+    
+    reward = calculate_reward(client, actionvector)
+    return reward
+
+def eval_handover(client, new_clients):
+    print("Current state:")
+    ap = client['aps']
+    state_cli = np.array([client['clients'], ap])
+    
+    state_cli = state_cli.transpose()
+    state_cli_norm = normalize_state(state_cli.tolist(), ob_space, s_size)
+    action = np.argmax(model.predict(state_cli_norm)[0])
+    actionvector = map_action(action)
+    
+    success_cli = False
+    for tmp in client['valid']:
+        tmpval = True
+        for a, b in zip(actionvector, tmp):
+            if a != b:
+                tmpval = False
+                break
+        if tmpval:
+            success_cli = True
+            break
+    
+    reward = get_best_reward(client['clients'], ap)
+    
+    print("Cli:" + str(client['clients']) + ", AP:" + str(ap) + ", Action:" +str(action) + ", Actionvector" + str(actionvector) + ", " + str(success_cli) + ", reward:" + str(reward))
+    
+    print("Handover simulation")
+    for new_client in new_clients:
+        ap = new_client['aps']
+        state_cli = np.array([new_client['clients'], ap])
+        
+        state_cli = state_cli.transpose()
+        state_cli_norm = normalize_state(state_cli.tolist(), ob_space, s_size)
+        action = np.argmax(model.predict(state_cli_norm)[0])
+        actionvector = map_action(action)
+        reward = calculate_reward(new_client['clients'], actionvector)
+        
+        success_cli = False
+        for tmp in new_client['valid']:
+            tmpval = True
+            for a, b in zip(actionvector, tmp):
+                if a != b:
+                    tmpval = False
+                    break
+            if tmpval:
+                success_cli = True
+                break
+        
+        print("Cli:" + str(new_client['clients']) + ", AP:" + str(ap) + ", Action:" +str(action) + ", Actionvector" + str(actionvector) + ", " + str(success_cli) + ", reward:" + str(reward))
+
+
+ac_space = spaces.MultiDiscrete([2,2,2])
+ob_space = spaces.Box(low=0, high=6, shape=(ac_space.nvec.shape[0],2), dtype=np.uint32)
+trainingfile = "path-to-neuronal-network.train"
+
+#generate random test data
+[test_data, test_labels] = generate_testdata(N_test, aps, maxclients, topologies, SORT_VALUES)
+
+clients = []
+for elem, label in zip(test_data, test_labels):
+    clients.append({'clients': elem[:,0], 'aps': elem[:,1], 'valid':label})
+
+
+special = [{'clients': [10, 10, 1000], 'aps': [2,2,2], 'valid':[[1,1,0], [0,0,1]]},
+            {'clients': [1000, 10, 10], 'aps': [1,2,1], 'valid':[[1,0,1], [0,1,0]]},
+            {'clients': [1000, 4, 2], 'aps': [1,0,1], 'valid':[[1,0,0], [0,0,1],[1,1,0], [0,1,1]]}
+            ]
+
+
+print("Observation space: ", ob_space,  ob_space.dtype)
+print("Action space: ", ac_space, ac_space.nvec)
+
+tmps_size = ob_space.shape
+s_size = tmps_size[0] * tmps_size[1]
+#s_size = list(map(lambda x: x * ob_space.high, s_size))
+a_size = pow(ac_space.nvec[0], ac_space.nvec.shape[0])
+
+print("observation_space size:" + str(s_size))
+print("Data: Trained Data of different settings with sorting agent. Experiment 3")
+
+model = keras.Sequential()
+model.add(keras.layers.Dense(s_size, input_shape=(s_size,), activation='relu'))
+#model.add(keras.layers.Dense(5, activation='relu'))
+model.add(keras.layers.Dense(a_size, activation='softmax'))
+model.compile(optimizer=tf.train.AdamOptimizer(0.001),
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+model.load_weights(trainingfile)
+
+print("\n3000 zufällige Szenarien:")
+eval(clients)
+
+print("\Spezielle Szenarien:")
+eval(special)
+
diff --git a/openAI_RRM/thompson_agent.py b/openAI_RRM/thompson_agent.py
new file mode 100644
index 0000000..05fa76e
--- /dev/null
+++ b/openAI_RRM/thompson_agent.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gym
+import UniFlexGym
+#import tensorflow as tf
+#import tensorflow.contrib.slim as slim
+import numpy as np
+#from tensorflow import keras
+import argparse
+import logging
+import time
+import csv
+import os
+from math import *
+
+
+parser = argparse.ArgumentParser(description='Uniflex reader')
+parser.add_argument('--config', help='path to the uniflex config file', default=None, required=True)
+parser.add_argument('--output', help='path to a csv file for agent output data', default=None)
+parser.add_argument('--plot', help='activate plotting', default=None)
+parser.add_argument('--steptime', help='interval between two steps', default=1)
+parser.add_argument('--steps', help='number of steps in this execution. If not set, the agents runs infinitly long', default=None)
+
+args = parser.parse_args()
+if not args.config:
+    print("No config file specified!")
+    os._exit(1)
+if not args.output:
+    print("No output file specified! - Skip data")
+
+if args.plot:
+    import matplotlib.pyplot as plt
+
+ac_space = []
+
+def map_action(mappedAction):
+    action = np.zeros(len(ac_space.nvec))
+    for index in range(len(ac_space.nvec)):
+        # filter action by the index
+        ifaceaction = int(mappedAction / (pow(ac_space.nvec[0] ,index)))
+        ifaceaction = ifaceaction % ac_space.nvec[0]
+        action[index] = ifaceaction
+    return action
+
+
+#create uniflex environment
+env = gym.make('uniflex-v0')
+#env.configure()
+env.start_controller(steptime=float(args.steptime), config=args.config)
+
+numChannels = 2
+episode = 1
+
+while True:
+    run = 0
+    
+    state = env.reset()
+    n = 0
+    ac_space = env.action_space
+    ob_space = env.observation_space
+    print("Observation space: ", ob_space,  ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.nvec)
+
+    a_size = 0
+    if len(ac_space.nvec) > 0:
+        a_size = int(pow(ac_space.nvec[0], ac_space.nvec.shape[0]))
+    
+    avg = []
+    num = []
+    maxreward = 1
+    lastreward = 0
+    lastaction = 0
+    
+    done = False
+    
+    if a_size == 0:
+        print("there is no vaild AP - sleep 10 seconds")
+        time.sleep(2)
+        continue
+    
+    aps = int(log(a_size, numChannels))
+    
+    for i in range(a_size):
+        avg.append(0)
+        num.append(0)
+    
+    while not done:
+        # generate random values
+        randval = []
+        for i in range(a_size):
+            randval.append(np.random.normal(avg[i]/maxreward, 1/(pow(num[i],1) + 1), 1))
+        
+        # take index of highest value
+        action = np.argmax(randval)
+        
+        #execute step
+        actionVector = map_action(action)
+        next_state, reward, done, _ = env.step(actionVector)
+        
+        #hysteresis
+        if action != lastaction and abs(reward - lastreward) < 0.1:
+            reward = reward * 0.75
+        lastaction = action
+        lastreward = reward
+        
+        # add reward for further execution
+        avg[action] = (avg[action] * num[action] + reward) / (num[action] + 2)
+        num[action] += 1
+        
+        maxreward = np.maximum(maxreward, reward)
+        
+        # statistics
+        if args.output:
+            with open(args.output, 'a') as csvFile:
+                writer = csv.writer(csvFile)
+                writer.writerow([reward, action, episode])
+            csvFile.close()
+        
+        print ("Reward: " + str(reward))
+        print ("GameOver: " + str(done))
+        print ("Next Channels: " + str(next_state))
+        print ("Channel selection:" + str(action))
+        print ("Average:" + str(avg))
+        print ("next step")
+        
+        if args.plot:
+            plt.subplot(211)
+            plt.plot(run, reward, 'bo')                 # Additional point
+            plt.ylabel('reward')
+            plt.subplot(212)
+            plt.plot(run, action, 'bo')                 # Additional point
+            plt.ylabel('action')
+            plt.xlabel('step')
+            plt.pause(0.05)
+        
+        run += 1
+        
+        if args.steps and int(args.steps) <= run:
+            os._exit(1)
+        
+    episode += 1
diff --git a/openAI_RRM/thompson_agent2.py b/openAI_RRM/thompson_agent2.py
new file mode 100644
index 0000000..42c91a7
--- /dev/null
+++ b/openAI_RRM/thompson_agent2.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gym
+import UniFlexGym
+#import tensorflow as tf
+#import tensorflow.contrib.slim as slim
+import numpy as np
+#from tensorflow import keras
+import argparse
+import logging
+import time
+import csv
+import os
+from math import *
+
+
+parser = argparse.ArgumentParser(description='Uniflex reader')
+parser.add_argument('--config', help='path to the uniflex config file', default=None, required=True)
+parser.add_argument('--output', help='path to a csv file for agent output data', default=None)
+parser.add_argument('--plot', help='activate plotting', default=None)
+parser.add_argument('--steptime', help='interval between two steps', default=1)
+parser.add_argument('--steps', help='number of steps in this execution. If not set, the agents runs infinitly long', default=None)
+
+args = parser.parse_args()
+if not args.config:
+    print("No config file specified!")
+    os._exit(1)
+if not args.output:
+    print("No output file specified! - Skip data")
+
+if args.plot:
+    import matplotlib.pyplot as plt
+
+ac_space = []
+
+def map_action(mappedAction):
+    action = np.zeros(len(ac_space.nvec))
+    for index in range(len(ac_space.nvec)):
+        # filter action by the index
+        ifaceaction = int(mappedAction / (pow(ac_space.nvec[0] ,index)))
+        ifaceaction = ifaceaction % ac_space.nvec[0]
+        action[index] = ifaceaction
+    return action
+
+
+#create uniflex environment
+env = gym.make('uniflex-v0')
+#env.configure()
+env.start_controller(steptime=float(args.steptime), config=args.config)
+
+numChannels = 2
+episode = 1
+
+while True:
+    run = 0
+    
+    state = env.reset()
+    n = 0
+    ac_space = env.action_space
+    ob_space = env.observation_space
+    print("Observation space: ", ob_space,  ob_space.dtype)
+    print("Action space: ", ac_space, ac_space.nvec)
+
+    a_size = 0
+    if len(ac_space.nvec) > 0:
+        a_size = int(pow(ac_space.nvec[0], ac_space.nvec.shape[0]))
+    
+    avg = []
+    num = []
+    maxreward = 1
+    lastreward = 0
+    lastaction = 0
+    
+    done = False
+    
+    if a_size == 0:
+        print("there is no vaild AP - sleep 10 seconds")
+        time.sleep(2)
+        continue
+    
+    aps = int(log(a_size, numChannels))
+    
+    for i in range(a_size):
+        avg.append(0)
+        num.append(0)
+    
+    while not done:
+        # generate random values
+        randval = []
+        for i in range(a_size):
+            randval.append(np.random.normal(avg[i]/maxreward, 1/(pow(num[i],2) + 1), 1))
+        
+        # take index of highest value
+        action = np.argmax(randval)
+        
+        #execute step
+        actionVector = map_action(action)
+        next_state, reward, done, _ = env.step(actionVector)
+        
+        #hysteresis
+        if action != lastaction and abs(reward - lastreward) < 0.1:
+            reward = reward * 0.75
+        lastaction = action
+        lastreward = reward
+        
+        # add reward for further execution
+        avg[action] = (avg[action] * num[action] + reward) / (num[action] + 2)
+        num[action] += 1
+        
+        maxreward = np.maximum(maxreward, reward)
+        
+        # statistics
+        if args.output:
+            with open(args.output, 'a') as csvFile:
+                writer = csv.writer(csvFile)
+                writer.writerow([reward, action, episode])
+            csvFile.close()
+        
+        print ("Reward: " + str(reward))
+        print ("GameOver: " + str(done))
+        print ("Next Channels: " + str(next_state))
+        print ("Channel selection:" + str(action))
+        print ("Average:" + str(avg))
+        print ("next step")
+        
+        if args.plot:
+            plt.subplot(211)
+            plt.plot(run, reward, 'bo')                 # Additional point
+            plt.ylabel('reward')
+            plt.subplot(212)
+            plt.plot(run, action, 'bo')                 # Additional point
+            plt.ylabel('action')
+            plt.xlabel('step')
+            plt.pause(0.05)
+        
+        run += 1
+        
+        if args.steps and int(args.steps) <= run:
+            os._exit(1)
+        
+    episode += 1