malib.utils.typing module

malib.utils.typing.ActionSpaceType

For task categorical and status tagging

class malib.utils.typing.AgentInvolveInfo(training_handler: str, trainable_pairs: Dict[str, Tuple[str, Dict[str, Any]]], populations: Dict[str, Sequence[Tuple[str, Dict[str, Any]]]], env_id: Optional[str] = None, meta_parameter_desc_dict: Optional[Dict[str, malib.utils.typing.MetaParameterDescription]] = None)[source]

Bases: object

AgentInvolveInfo describes the trainable pairs, populations, environment id and the meta parameter descriptions.

env_id: str = None

environment id

classmethod gen_template(agent_ids: List[str], observation_space: gym.spaces.space.Space, action_space: gym.spaces.space.Space)[source]
meta_parameter_desc_dict: Dict[str, malib.utils.typing.MetaParameterDescription] = None

meta parameter description

populations: Dict[str, Sequence[Tuple[str, Dict[str, Any]]]]

describe the policy population of agents

trainable_pairs: Dict[str, Tuple[str, Dict[str, Any]]]

describe the environment agent id and their binding policy configuration

training_handler: str
class malib.utils.typing.BColors[source]

Bases: object

BOLD = '\x1b[1m'
ENDC = '\x1b[0m'
FAIL = '\x1b[91m'
HEADER = '\x1b[95m'
OKBLUE = '\x1b[94m'
OKCYAN = '\x1b[96m'
OKGREEN = '\x1b[92m'
UNDERLINE = '\x1b[4m'
WARNING = '\x1b[93m'
class malib.utils.typing.BatchMetaInfo(episode_id: str, created_time: float, meta_policy_id: str = None, policy_id: str = None, env_id: Any = None, policy_type: Any = None)[source]

Bases: object

created_time: float
env_id: Any = None
episode_id: str
meta_policy_id: str = None
policy_id: str = None
policy_type: Any = None
class malib.utils.typing.BehaviorMode(value)[source]

Bases: enum.IntEnum

Behavior mode, indicates environment agent behavior

EXPLOITATION = 1

Trigger exploitation mode

EXPLORATION = 0

Trigger exploration mode

class malib.utils.typing.BufferDescription(env_id: str, agent_id: Union[str, List[str]], policy_id: Union[str, List[str]], batch_size: int = 0, sample_mode: str = '', indices: List[int] = None, data: Any = None, data_shapes: Dict[str, Tuple] = None, sample_start_size: int = 0, capacity: int = 1000, identify: str = None)[source]

Bases: object

agent_id: Union[str, List[str]]
batch_size: int = 0
capacity: int = 1000
data: Any = None
data_shapes: Dict[str, Tuple] = None
env_id: str
identify: str = None
indices: List[int] = None
policy_id: Union[str, List[str]]
sample_mode: str = ''
sample_start_size: int = 0
class malib.utils.typing.EvaluateResult[source]

Bases: object

AVE_REWARD = 'average_reward'
CONVERGED = 'converged'
REACHED_MAX_ITERATION = 'reached_max_iteration'
static default_result()[source]
class malib.utils.typing.EventReportStatus[source]

Bases: object

END = 'end'
START = 'start'
class malib.utils.typing.ExperimentManagerTableName[source]

Bases: object

key: int = 0
nid: int = 0
primary: str = ''
secondary: str = ''
tag: str = ''
class malib.utils.typing.MetaParameterDescription(meta_pid: str, parameter_desc_dict: Dict[str, malib.utils.ParameterDescription], timestamp: float = 1645173917.4664972, identify: str = 'MetaParameterDescription')[source]

Bases: object

classmethod gen_template(**kwargs)[source]
identify: str = 'MetaParameterDescription'
meta_pid: str
parameter_desc_dict: Dict[str, malib.utils.typing.ParameterDescription]
timestamp: float = 1645173917.4664972
class malib.utils.typing.MetricEntry(value: Any, agg: str = 'mean', tag: str = '', log: bool = True)[source]

Bases: object

cleaned_data()[source]

Return values

class malib.utils.typing.MetricType[source]

Bases: object

LIVE_STEP = 'live_step'

Agent live step

REACH_MAX_STEP = 'reach_max_step'

Whether reach max step or not

REWARD = 'reward'

Reward

class malib.utils.typing.Paradigm(value)[source]

Bases: enum.Enum

An enumeration.

MARL = 'marl'
META_GAME = 'meta_game'
malib.utils.typing.Parameter = typing.Any

Description:

class malib.utils.typing.ParameterDescription(time_stamp: float, identify: str, env_id: str, id: str, type: str = 'parameter', lock: bool = False, description: Any = None, data: Any = None, parallel_num: int = 1, version: int = - 1)[source]

Bases: object

class Type[source]

Bases: object

GRADIENT = 'gradient'
PARAMETER = 'parameter'
data: Any = None
description: Any = None
env_id: str
classmethod gen_template(**kwargs)[source]
id: str
identify: str
lock: bool = False
parallel_num: int = 1
time_stamp: float
type: str = 'parameter'
version: int = -1
class malib.utils.typing.RolloutDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, fragment_length: int, num_episodes: int, episode_seg: int, terminate_mode: str, mode: str, callback: Union[str, Callable] = 'sequential', stopper: str = 'none', stopper_config: Dict[str, Any] = <factory>, policy_distribution: Dict[str, Dict[str, float]] = None, time_stamp: float = 1645173917.4686654)[source]

Bases: object

agent_involve_info: malib.utils.typing.AgentInvolveInfo
callback: Union[str, Callable] = 'sequential'
episode_seg: int
fragment_length: int
classmethod gen_template(**template_attr_kwargs)[source]
mode: str
num_episodes: int
policy_distribution: Dict[str, Dict[str, float]] = None
stopper: str = 'none'
stopper_config: Dict[str, Any]
terminate_mode: str
time_stamp: float = 1645173917.4686654
class malib.utils.typing.RolloutFeedback(worker_idx: str, agent_involve_info: malib.utils.typing.AgentInvolveInfo, statistics: Dict[str, Any], policy_combination: Optional[Dict[str, str]] = None)[source]

Bases: object

RolloutFeedback for rollout tasks

agent_involve_info: malib.utils.typing.AgentInvolveInfo

agent involve info describes the …

policy_combination: Dict[str, str] = None
statistics: Dict[str, Any]
worker_idx: str

id of rollout worker

class malib.utils.typing.SimulationDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, policy_combinations: List[Dict[str, Tuple[str, Dict[str, Any]]]], num_episodes: int, callback: Union[str, Callable] = 'sequential', max_episode_length: int = None, time_stamp: float = 1645173917.4693458)[source]

Bases: object

agent_involve_info: malib.utils.typing.AgentInvolveInfo
callback: Union[str, Callable] = 'sequential'
classmethod gen_template(**kwargs)[source]
max_episode_length: int = None
num_episodes: int
policy_combinations: List[Dict[str, Tuple[str, Dict[str, Any]]]]
time_stamp: float = 1645173917.4693458
class malib.utils.typing.StandardTransition(obs, new_obs, actions, rewards, dones)

Bases: tuple

Create new instance of StandardTransition(obs, new_obs, actions, rewards, dones)

property actions

Alias for field number 2

property dones

Alias for field number 4

property new_obs

Alias for field number 1

property obs

Alias for field number 0

property rewards

Alias for field number 3

class malib.utils.typing.Status(value)[source]

Bases: enum.Enum

An enumeration.

EXCEED = 'exceed'
FAILED = 'failed'
IDLE = 'idle'
IN_PROGRESS = 'in progress'
LOCKED = 'locked'
NORMAL = 'normal'
SUCCESS = 'success'
TERMINATE = 'terminate'
WAITING = 'waiting'
class malib.utils.typing.TaskDescription(task_type: malib.utils.typing.TaskType, content: Union[malib.utils.typing.TrainingDescription, malib.utils.typing.RolloutDescription, malib.utils.typing.SimulationDescription], state_id: Any, timestamp: Optional[float] = None, source_task_id: Optional[str] = None, identify: Optional[str] = None)[source]

Bases: object

TaskDescription is a general description of Training, Rollout and Simulation tasks.

content: Union[malib.utils.typing.TrainingDescription, malib.utils.typing.RolloutDescription, malib.utils.typing.SimulationDescription]

content is a detailed task description entity

classmethod gen_template(**template_attr_kwargs)[source]
identify: str = None
source_task_id: str = None
state_id: Any
task_type: malib.utils.typing.TaskType

task type used to identify which task description will be used

timestamp: float = None
class malib.utils.typing.TaskRequest(task_type: malib.utils.typing.TaskType, content: Any, state_id: str, timestamp: Optional[float] = None, identify: Optional[str] = None, computing_mode: str = 'bulk_sync')[source]

Bases: object

TaskRequest is a description of

computing_mode: str = 'bulk_sync'
content: Any

content is the feedback of current handler which request for next task

static from_task_desc(task_desc: malib.utils.typing.TaskDescription, **kwargs) malib.utils.typing.TaskRequest[source]
identify: str = None
state_id: str
task_type: malib.utils.typing.TaskType

defines the requested task type

timestamp: float = None
class malib.utils.typing.TaskType(value)[source]

Bases: enum.Enum

An enumeration.

ADD_POLICY = 'add_policy'
ADD_WORKER = 'add_worker'
ASYNC_LEARNING = 'async_learning'
CHECK_ADD = 'check_add'
EVALUATE = 'evaluate'
EVALUATE_WRITE_BACK = 'evaluate_write_back'
INIT = 'initialization'
LOAD_MODEL = 'load_model'
NO = 'no'
OPTIMIZE = 'optimization'
PULL_PARAMETER = 'pull_parameter'
PUSH_PARAMETER = 'push_parameter'
PUSH_SAMPLES = 'push_samples'
ROLLOUT = 'rollout'
ROLLOUT_EVALUATE = 'evaluate_for_rollouts'
SAMPLE_BATCH = 'sample_batch'
SAVE_MODEL = 'save_model'
SIMULATION = 'simulation'
TERMINATE = 'terminate'
TRAINING_EVALUATE = 'evaluate_for_training'
UPDATE_PARAMETER = 'update_PARAMETER'
UPDATE_PAYOFFTABLE = 'update_payofftable'
UPDATE_POPULATION = 'update_population'
class malib.utils.typing.TrainingDescription(agent_involve_info: malib.utils.typing.AgentInvolveInfo, stopper: str = 'none', stopper_config: Dict[str, Any] = <factory>, policy_distribution: Dict[str, Dict[str, float]] = None, update_interval: int = 1, batch_size: int = 64, mode: str = 'step', time_stamp: float = 1645173917.46807)[source]

Bases: object

agent_involve_info: malib.utils.typing.AgentInvolveInfo
batch_size: int = 64
classmethod gen_template(**template_attr_kwargs)[source]
mode: str = 'step'
policy_distribution: Dict[str, Dict[str, float]] = None
stopper: str = 'none'
stopper_config: Dict[str, Any]
time_stamp: float = 1645173917.46807
update_interval: int = 1
class malib.utils.typing.TrainingFeedback(agent_involve_info: malib.utils.typing.AgentInvolveInfo, statistics: Dict[str, Any])[source]

Bases: object

agent_involve_info: malib.utils.typing.AgentInvolveInfo
statistics: Dict[str, Any]
class malib.utils.typing.TrainingMetric[source]

Bases: object

LOSS = 'loss'