Source code for SuperSonic.policy_definition.reward

import numpy as np

[docs]class reward_function:
    """:class:
    A reward function that reports the quality of the actions taken so far.
    It provides candidate reward functions like RelativeMeasure and tanh to compute
    the reward based on the metric given by the measurement interface.
    """
    def __init__(self):
        """Construct and initialize reward-transition method of different tasks."""
        self.rew_fun = "tanh"

[docs]    def get_rew(self, input, baseline=1, weight=1, reward_function="usr_define"):
        """Get reward with specific reward functions

                :param input: Input, usually as input of an transition function, e.g. runtime, speedup and hamming distance.
                :param baseline: Using baseline to calculate speedup etc.
                :param weight: Using weight parameter to set how important of specific action.
                :param reward_function: reward functions, reward-transition method.
                """
        global reward
        self.baseline = (
            baseline
        )
        self.current = input
        if reward_function == "usr_define":
            reward = self.current

        if reward_function == "relative_measure":
            reward = self.current / self.baseline

        if reward_function == "tan":
            reward = np.tan(self.current)

        if reward_function == "func":
            if self.current < self.baseline:
                reward = 0
            else:
                reward = 1


        if reward_function == "weight":
            if self.current < self.baseline:
                exec_diff = self.baseline - self.current
                self.current = self.current
                reward = exec_diff * weight
            else:
                reward = 0.0

    
        return reward