Source code for SuperSonic.policy_definition.policy_define

from itertools import product
import os
import numpy as np
import sklearn.model_selection


[docs]class SuperOptimizer:
    """:class:
    SuperOptimizer includes candidate functions (or models) for representing the environment state,
    objective functions for computing the reward, and the set of possible actions that can be taken from a given
    state.
    The compiler developer first defines the optimization problem by creating an RL policy interface.
    The definition includes a list of client RL components for the meta-optimizer to search over.
                """
    def __init__(
        self,
        StateFunctions=["Word2vec", "Doc2vec", "Bert"],
        RewardFunctions=["relative_measure", "tan", "func", "weight"],
        # A3C ARS ES need num_workers>0
        RLAlgorithms=["MCTS", "PPO", "APPO", "A2C", "DQN", "QLearning",
                      "MARWIL", "PG", "SimpleQ", "A3C", "ARS", "ES", "BC"],
        ActionFunctions=["init"],
        datapath="",
    ):
        """Construct and initialize the parameters of policy definition.

        :param StateFunctions: State functions, The SuperSonic RL components include pre-trained observation functions, such as
    Word2Vec and Doc2Vec.
        :param RewardFunctions: Reward functions, It provides candidate reward functions like RelativeMeasure and tanh to compute
    the reward based on the metric given by the measurement interface.
        :param RLAlgorithms: RL algorithms, SuperSonic currently supports 23 RL algorithms from RLLib, covering a wide range of established RL algorithms.
        :param ActionFunctions: Action functions, define a discrete set of actions or transformations that can be applied
to a program, such as passes in a compiler.
        :param datapath: The benchmarks' save path.
                        """
        self.StateFunctions = StateFunctions
        self.RewardFunctions = RewardFunctions
        self.RLAlgorithms = RLAlgorithms
        self.ActionFunctions = ActionFunctions
        self.datapath = datapath

[docs]    def PolicyDefined(self):
        """Each of the components can be chosen from
        a pool of SuperSonic built-in candidate methods, and the
        combination of these components can result in a large policy
        search space.

        :return policy_all: All policy strategies.
        :return policy_amount: A list includes index for each policy strategy.
        """
        global policy
        self.policy = {
            "StatList": self.StateFunctions,
            "ActList": self.ActionFunctions,
            "RewList": self.RewardFunctions,
            "AlgList": self.RLAlgorithms,
        }
        self.policy_all = [
            dict(zip(self.policy, v)) for v in product(*self.policy.values())
        ]
        self.policy_amount = len(self.policy_all) - 1
        return self.policy_all, self.policy_amount

[docs]    def cross_valid(self,):
        """ split dataset to train/valid set, default using 3-fold cross validation """
        data_list = []
        for root, dirs, files in os.walk(self.datapath):
            if files == []:
                for i in dirs:
                    i = root + "/" + i
                    data_list.append(i)
            else:
                for i in files:
                    i = root + "/" + i
                    data_list.append(i)
            break

        data_list = np.array(data_list)
        kfolder = sklearn.model_selection.KFold(
            n_splits=3, shuffle=False, random_state=None
        )
        for index, (train, test) in enumerate(kfolder.split(data_list)):
            TrainDataset = data_list[train]
            TestDataset = data_list[test]
            Dataset = [TrainDataset, TestDataset]
            return Dataset