multiAgentTaskOffloading/_mdp_centralized_agent.py at main · Andrea-Fox/multiAgentTaskOffloading · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'

import numpy as np
import math
import random
import copy

from typing import Optional
import gymnasium
from gymnasium import spaces
from gymnasium.spaces import Box, Tuple
from gymnasium.spaces import MultiDiscrete

class CentralizedSystem(gymnasium.Env):

    def __init__(self, list_agents) :

        super().__init__()
        self.agents = list_agents

        self.n_agents = len(list_agents)
        print(self.n_agents)

        self.t = 0

        low = []
        high = []

        for i in range(2 * self.n_agents):
            if i % 2 == 0:
                low.append(1)
                high.append(self.agents[i//2].M)
            else:
                low.append(-self.agents[i//2].B)
                high.append(self.agents[i//2].B)

        observation_space = Box(low=np.array(low), high=np.array(high), dtype=np.int64)

        # print(observation_space)


        self._agents_aoi = [1 for i in range(self.n_agents)]
        self._agents_energy = [0 for i in range(self.n_agents)]

        self.observation_space = observation_space

        action_space = spaces.multi_discrete.MultiDiscrete([3] * self.n_agents)  # Each element is in {0, 1, 2}

        # print(action_space)
        self.action_space = action_space


        # print(action_space.sample())


    def _get_obs(self):
        observation = []
        for i in range(self.n_agents):
            observation.append(self._agents_aoi[i])
            observation.append(self._agents_energy[i])
        return observation

    def make_state_dictionary(self, state = None) : #, state_vector):
        state = []
        for agent in self.agents:
            local_aoi = self._agents_aoi[self.agents.index(agent)]
            local_energy = self._agents_energy[self.agents.index(agent)]

            # we have to compute the state index
            state_index = agent.compute_state_index(local_aoi, local_energy)

            local_state = dict({'x': local_aoi, 'e': local_energy, 'index': state_index})
            state.append(local_state)
        return state

    def global_reward(self, global_action, state = None, vector_rewards = True, LP = False):
        # compute the global reward. The state is the implicit one saved in the vectors self._agents_aoi and self._agents_energy

        # we have to compute the individual states given the global (implicit) state
        if state is None:
            state = dict()
            for agent in self.agents:
                local_aoi = self._agents_aoi[self.agents.index(agent)]
                local_energy = self._agents_energy[self.agents.index(agent)]
                # we have to compute the state index
                state_index = agent.compute_state_index(local_aoi, local_energy)
                local_state = dict({'x': local_aoi, 'e': local_energy, 'index': state_index})
                state['Agent {}'.format(self.agents.index(agent))] = local_state


        # print(state)
        global_reward = 0
        global_reward_components = np.zeros((self.n_agents, ))
        for i in range(self.n_agents):
            if LP:
                agent_state = state['Agent {}'.format(i+1)]
            else:
                agent_state = state['Agent {}'.format(i)]
            if agent_state['e']<0:
                # the penalty is equal to the absolute value of the energy (i.e. how many energy units we need to add in order to have a positive energy and therefore conclude the processing action)
                if global_action[i] == 0:
                    global_reward += agent_state['x'] - agent_state['e']
                    global_reward_components[i] = agent_state['x'] - agent_state['e']
                else:
                    global_reward += 1000
                    global_reward_components[i] = 1000
            elif global_action[i] == 0 and (agent_state['x'] >= self.agents[i].M or agent_state['e'] >= self.agents[i].B):
                global_reward += agent_state['x'] + 1000
                global_reward_components[i] = agent_state['x'] + 1000
            elif global_action[i] <= 1 and agent_state['e'] >= 0:
                global_reward += agent_state['x']
                global_reward_components[i] = agent_state['x']
            elif global_action[i] == 2:
                # we can actually simulate the amount of other agents that choose this action
                # for the moment, the cost of the other agents is equal to the average amount of agents that choose the common action
                cost_other_agents = lambda x : self.agents[i].congestion_penalty_parameters['multiplier'] * (x-1)**self.agents[i].congestion_penalty_parameters['exponent']
                # the interaction with the other agents is limited to countin the number of agents that choose the same action (note how the agent agent_index does not have to be counted)
                other_agents_interation_action = np.count_nonzero(global_action == 2)
                global_reward += agent_state['x'] + cost_other_agents(other_agents_interation_action)
                global_reward_components[i] = agent_state['x'] + cost_other_agents(other_agents_interation_action)
            else:
                print(agent_state, action[i])
                raise ValueError("Error in reward function")

        if vector_rewards:
            return global_reward, global_reward_components
        else:
            return global_reward

    def cost_function(self, state, action):
        if action == 2:
            return 1
        else:
            return 0


    def step(self, action, vector_rewards = False):

        # we have to compute the individual states given the global (implicit) state
        self.t += 1
        next_state_vector = []
        for agent in self.agents:
            local_aoi = self._agents_aoi[self.agents.index(agent)]
            local_energy = self._agents_energy[self.agents.index(agent)]

            # we have to compute the state index
            state_index = agent.compute_state_index(local_aoi, local_energy)

            local_state = dict({'x': local_aoi, 'e': local_energy, 'index': state_index})

            # we use the local funciton step to find the next state for each agent. The reward will be computed in the global function
            next_local_state, _, _, _ = agent.step(local_state, action[self.agents.index(agent)], training = False)
            next_state_vector.append(next_local_state)

        global_reward = self.global_reward(action, vector_rewards = vector_rewards)

        if vector_rewards:
            global_reward_components = global_reward[1]
            global_reward = copy.copy(global_reward[0])
        else:
            global_reward_components = None

        # we have to update the global state
        for i in range(self.n_agents):
            self._agents_aoi[i] = next_state_vector[i]['x']
            self._agents_energy[i] = next_state_vector[i]['e']


        # obtain energy, aoi and server availability from state index


        if self.t >= self.agents[0].max_episodes_steps:
            episode_ended = True
        else:
            episode_ended = False
        observation = self._get_obs()
        return observation, global_reward, episode_ended, False, {'global_reward_components': global_reward_components}

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)
        self.t = 0
        self._agents_aoi = [1 for i in range(self.n_agents)]
        self._agents_energy = [0 for i in range(self.n_agents)]

        observation = self._get_obs()

        return observation , {}