malib.utils.typing module
- malib.utils.typing.ActionSpaceType
For task categorical and status tagging
- class malib.utils.typing.AgentInvolveInfo(training_handler: str, trainable_pairs: Dict[str, Tuple[str, Dict[str, Any]]], populations: Dict[str, Sequence[Tuple[str, Dict[str, Any]]]], env_id: Optional[str] = None, meta_parameter_desc_dict: Optional[Dict[str, malib.utils.typing.MetaParameterDescription]] = None)[source]
Bases:
objectAgentInvolveInfo describes the trainable pairs, populations, environment id and the meta parameter descriptions.
- env_id: str = None
environment id
- classmethod gen_template(agent_ids: List[str], observation_space: gym.spaces.space.Space, action_space: gym.spaces.space.Space)[source]
- meta_parameter_desc_dict: Dict[str, malib.utils.typing.MetaParameterDescription] = None
meta parameter description
- populations: Dict[str, Sequence[Tuple[str, Dict[str, Any]]]]
describe the policy population of agents
- trainable_pairs: Dict[str, Tuple[str, Dict[str, Any]]]
describe the environment agent id and their binding policy configuration
- training_handler: str
- class malib.utils.typing.BColors[source]
Bases:
object- BOLD = '\x1b[1m'
- ENDC = '\x1b[0m'
- FAIL = '\x1b[91m'
- HEADER = '\x1b[95m'
- OKBLUE = '\x1b[94m'
- OKCYAN = '\x1b[96m'
- OKGREEN = '\x1b[92m'
- UNDERLINE = '\x1b[4m'
- WARNING = '\x1b[93m'
- class malib.utils.typing.BatchMetaInfo(episode_id: str, created_time: float, meta_policy_id: str = None, policy_id: str = None, env_id: Any = None, policy_type: Any = None)[source]
Bases:
object- created_time: float
- env_id: Any = None
- episode_id: str
- meta_policy_id: str = None
- policy_id: str = None
- policy_type: Any = None
- class malib.utils.typing.BehaviorMode(value)[source]
Bases:
enum.IntEnumBehavior mode, indicates environment agent behavior
- EXPLOITATION = 1
Trigger exploitation mode
- EXPLORATION = 0
Trigger exploration mode
- class malib.utils.typing.BufferDescription(env_id: str, agent_id: Union[str, List[str]], policy_id: Union[str, List[str]], batch_size: int = 0, sample_mode: str = '', indices: List[int] = None, data: Any = None, data_shapes: Dict[str, Tuple] = None, sample_start_size: int = 0, capacity: int = 1000, identify: str = None)[source]
Bases:
object- agent_id: Union[str, List[str]]
- batch_size: int = 0
- capacity: int = 1000
- data: Any = None
- data_shapes: Dict[str, Tuple] = None
- env_id: str
- identify: str = None
- indices: List[int] = None
- policy_id: Union[str, List[str]]
- sample_mode: str = ''
- sample_start_size: int = 0
- class malib.utils.typing.EvaluateResult[source]
Bases:
object- AVE_REWARD = 'average_reward'
- CONVERGED = 'converged'
- REACHED_MAX_ITERATION = 'reached_max_iteration'
- class malib.utils.typing.ExperimentManagerTableName[source]
Bases:
object- key: int = 0
- nid: int = 0
- primary: str = ''
- secondary: str = ''
- tag: str = ''
- class malib.utils.typing.MetaParameterDescription(meta_pid: str, parameter_desc_dict: Dict[str, malib.utils.ParameterDescription], timestamp: float = 1645173917.4664972, identify: str = 'MetaParameterDescription')[source]
Bases:
object- identify: str = 'MetaParameterDescription'
- meta_pid: str
- parameter_desc_dict: Dict[str, malib.utils.typing.ParameterDescription]
- timestamp: float = 1645173917.4664972
- class malib.utils.typing.MetricEntry(value: Any, agg: str = 'mean', tag: str = '', log: bool = True)[source]
Bases:
object
- class malib.utils.typing.MetricType[source]
Bases:
object- LIVE_STEP = 'live_step'
Agent live step
- REACH_MAX_STEP = 'reach_max_step'
Whether reach max step or not
- REWARD = 'reward'
Reward
- class malib.utils.typing.Paradigm(value)[source]
Bases:
enum.EnumAn enumeration.
- MARL = 'marl'
- META_GAME = 'meta_game'
- malib.utils.typing.Parameter = typing.Any
Description:
- class malib.utils.typing.ParameterDescription(time_stamp: float, identify: str, env_id: str, id: str, type: str = 'parameter', lock: bool = False, description: Any = None, data: Any = None, parallel_num: int = 1, version: int = - 1)[source]
Bases:
object- data: Any = None
- description: Any = None
- env_id: str
- id: str
- identify: str
- lock: bool = False
- parallel_num: int = 1
- time_stamp: float
- type: str = 'parameter'
- version: int = -1
- class malib.utils.typing.RolloutDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, fragment_length: int, num_episodes: int, episode_seg: int, terminate_mode: str, mode: str, callback: Union[str, Callable] = 'sequential', stopper: str = 'none', stopper_config: Dict[str, Any] = <factory>, policy_distribution: Dict[str, Dict[str, float]] = None, time_stamp: float = 1645173917.4686654)[source]
Bases:
object- agent_involve_info: malib.utils.typing.AgentInvolveInfo
- callback: Union[str, Callable] = 'sequential'
- episode_seg: int
- fragment_length: int
- mode: str
- num_episodes: int
- policy_distribution: Dict[str, Dict[str, float]] = None
- stopper: str = 'none'
- stopper_config: Dict[str, Any]
- terminate_mode: str
- time_stamp: float = 1645173917.4686654
- class malib.utils.typing.RolloutFeedback(worker_idx: str, agent_involve_info: malib.utils.typing.AgentInvolveInfo, statistics: Dict[str, Any], policy_combination: Optional[Dict[str, str]] = None)[source]
Bases:
objectRolloutFeedback for rollout tasks
- agent_involve_info: malib.utils.typing.AgentInvolveInfo
agent involve info describes the …
- policy_combination: Dict[str, str] = None
- statistics: Dict[str, Any]
- worker_idx: str
id of rollout worker
- class malib.utils.typing.SimulationDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, policy_combinations: List[Dict[str, Tuple[str, Dict[str, Any]]]], num_episodes: int, callback: Union[str, Callable] = 'sequential', max_episode_length: int = None, time_stamp: float = 1645173917.4693458)[source]
Bases:
object- agent_involve_info: malib.utils.typing.AgentInvolveInfo
- callback: Union[str, Callable] = 'sequential'
- max_episode_length: int = None
- num_episodes: int
- policy_combinations: List[Dict[str, Tuple[str, Dict[str, Any]]]]
- time_stamp: float = 1645173917.4693458
- class malib.utils.typing.StandardTransition(obs, new_obs, actions, rewards, dones)
Bases:
tupleCreate new instance of StandardTransition(obs, new_obs, actions, rewards, dones)
- property actions
Alias for field number 2
- property dones
Alias for field number 4
- property new_obs
Alias for field number 1
- property obs
Alias for field number 0
- property rewards
Alias for field number 3
- class malib.utils.typing.Status(value)[source]
Bases:
enum.EnumAn enumeration.
- EXCEED = 'exceed'
- FAILED = 'failed'
- IDLE = 'idle'
- IN_PROGRESS = 'in progress'
- LOCKED = 'locked'
- NORMAL = 'normal'
- SUCCESS = 'success'
- TERMINATE = 'terminate'
- WAITING = 'waiting'
- class malib.utils.typing.TaskDescription(task_type: malib.utils.typing.TaskType, content: Union[malib.utils.typing.TrainingDescription, malib.utils.typing.RolloutDescription, malib.utils.typing.SimulationDescription], state_id: Any, timestamp: Optional[float] = None, source_task_id: Optional[str] = None, identify: Optional[str] = None)[source]
Bases:
objectTaskDescription is a general description of Training, Rollout and Simulation tasks.
- content: Union[malib.utils.typing.TrainingDescription, malib.utils.typing.RolloutDescription, malib.utils.typing.SimulationDescription]
content is a detailed task description entity
- identify: str = None
- source_task_id: str = None
- state_id: Any
- task_type: malib.utils.typing.TaskType
task type used to identify which task description will be used
- timestamp: float = None
- class malib.utils.typing.TaskRequest(task_type: malib.utils.typing.TaskType, content: Any, state_id: str, timestamp: Optional[float] = None, identify: Optional[str] = None, computing_mode: str = 'bulk_sync')[source]
Bases:
objectTaskRequest is a description of
- computing_mode: str = 'bulk_sync'
- content: Any
content is the feedback of current handler which request for next task
- static from_task_desc(task_desc: malib.utils.typing.TaskDescription, **kwargs) malib.utils.typing.TaskRequest[source]
- identify: str = None
- state_id: str
- task_type: malib.utils.typing.TaskType
defines the requested task type
- timestamp: float = None
- class malib.utils.typing.TaskType(value)[source]
Bases:
enum.EnumAn enumeration.
- ADD_POLICY = 'add_policy'
- ADD_WORKER = 'add_worker'
- ASYNC_LEARNING = 'async_learning'
- CHECK_ADD = 'check_add'
- EVALUATE = 'evaluate'
- EVALUATE_WRITE_BACK = 'evaluate_write_back'
- INIT = 'initialization'
- LOAD_MODEL = 'load_model'
- NO = 'no'
- OPTIMIZE = 'optimization'
- PULL_PARAMETER = 'pull_parameter'
- PUSH_PARAMETER = 'push_parameter'
- PUSH_SAMPLES = 'push_samples'
- ROLLOUT = 'rollout'
- ROLLOUT_EVALUATE = 'evaluate_for_rollouts'
- SAMPLE_BATCH = 'sample_batch'
- SAVE_MODEL = 'save_model'
- SIMULATION = 'simulation'
- TERMINATE = 'terminate'
- TRAINING_EVALUATE = 'evaluate_for_training'
- UPDATE_PARAMETER = 'update_PARAMETER'
- UPDATE_PAYOFFTABLE = 'update_payofftable'
- UPDATE_POPULATION = 'update_population'
- class malib.utils.typing.TrainingDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, stopper: str = 'none', stopper_config: Dict[str, Any] = <factory>, policy_distribution: Dict[str, Dict[str, float]] = None, update_interval: int = 1, batch_size: int = 64, mode: str = 'step', time_stamp: float = 1645173917.46807)[source]
Bases:
object- agent_involve_info: malib.utils.typing.AgentInvolveInfo
- batch_size: int = 64
- mode: str = 'step'
- policy_distribution: Dict[str, Dict[str, float]] = None
- stopper: str = 'none'
- stopper_config: Dict[str, Any]
- time_stamp: float = 1645173917.46807
- update_interval: int = 1
- class malib.utils.typing.TrainingFeedback(agent_involve_info: malib.utils.typing.AgentInvolveInfo, statistics: Dict[str, Any])[source]
Bases:
object- agent_involve_info: malib.utils.typing.AgentInvolveInfo
- statistics: Dict[str, Any]