Source code for SuperSonic.utils.environments.Stoke_env

import gym
import numpy as np
import grpc
import threading
import sqlite3
import time
from gym.spaces import Discrete, Dict, Box
from SuperSonic.service import schedule_pb2
from SuperSonic.service import schedule_pb2_grpc
from concurrent import futures
from copy import deepcopy
_ONE_DAY_IN_SECONDS = 60 * 60 * 24  # set timeout
# add mutex
lock = threading.Lock()
lock_s = threading.Lock()

# global variable for update action,reward,observation
state_code = ""
Action = 2
state_reward = 1000.0

class ScheduleServicer(schedule_pb2_grpc.ScheduleServiceServicer):

    def GetStokeMsg(self, request, context):
        global state_code
        global state_reward
        lock_s.acquire()
        state_code = request.code
        state_reward = request.cost

        if lock.locked():
            lock.release()
        return schedule_pb2.MsgStokeResponse(action=Action)

[docs]class stoke_rl: """A :class: STOKE is a stochastic optimizer and program synthesizer for the x86-64 instruction set. This classical compiler optimization task finds a valid code sequence to maximize the performance of a loop-free sequence of instructions. Superoptimizaiton is an expensive optimization technique as the number of possible configurations grows exponentially as the instruction count to be optimized increases. Source: This environment corresponds to the version of the STOKE described by stanfordPL. (https://github.com/StanfordPL/stoke) paper link: https://raw.githubusercontent.com/StanfordPL/stoke/develop/docs/papers/asplos13.pdf Observation: Type: Box(100) Optimized code will be convert to vectors by different embedding approaches, e.g. Word2vec, Doc2vec, CodeBert ... Actions: Type: Discrete(9) NUm Action Description 0 add_nops Adds one extra nop instruction into the rewrite. 1 delete Deletes one instruction at random. 2 instruction Replaces an instruction with another one chosen at random. 3 opcode Replaces an instruction's opcode with a new one that takes operands of the same type. 4 operand Replaces an operand of one instruction with another. 5 rotate Formerly "resize". Moves an instruction from one basic block to another, and shifts all the instructions in between. 6 local_swap Takes two instructions in the same basic block and swaps them. 7 global_swap Takes two instructions in the entire program and swaps them. 8 weighted Selects from among several other transforms at random. Reward: In all cases, lower cost is better. We combine the value of correctness with other values we want to optimize for. Name Description binsize The size (in bytes) of the assembled rewrite using the x64asm library. correctness How "correct" the rewrite's output appears. Very configurable. size The number of instructions in the assembled rewrite. latency A poor-man's estimate of the rewrite latency, in clock cycles, based on the per-opcode latency table in src/cost/tables. measured An estimate of running time by counting the number of instructions actually executed on the testcases. Good for loops and algorithmic improvements. sseavx Returns '1' if both avx and sse instructions are used (this is usually bad!), and '0' otherwise. Often used with a multiplier like correctness + 1000*sseavx nongoal Returns '1' if the code (after minimization) is found to be equivalent to one in --non_goal. Can also be used with a multiplier. Starting State: All observations are assigned a uniform random value in [-1..1] """ def __init__(self, env_config): """ Defines the reinforcement leaning environment. Initialise with an environment. :param env_config: including "state_function", "action_function", "reward_function", "observation_space" """ self.env = gym.make( "Stoke-v0", state_function=env_config.get("state_function"), action_function=env_config.get("action_function"), reward_function=env_config.get("reward_function"), ) self.sql_path = env_config.get("sql_path") self.action_space = Discrete(9) self.observation_space = Dict( { "obs": self.env.observation_space, "action_mask": Box(low=0, high=1, shape=(self.action_space.n,)), } ) self.running_reward = 0 self.tstart = time.time() # grpc connect self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) schedule_pb2_grpc.add_ScheduleServiceServicer_to_server( ScheduleServicer(), self.server ) self.server.add_insecure_port(env_config.get("target")) self.server.start()
[docs] def reset(self): """ reset the RL environment. """ self.running_reward = 0 return { "obs": self.env.reset(), "action_mask": np.array([1, 1, 1, 1, 1, 1, 1, 1, 1]), }
[docs] def step(self, action): """Take a step. :param action: An action, or a sequence of actions. When multiple actions are provided the observation and reward are returned after running all of the actions. :return: A tuple of observation, observation_mask, score, done, and info. """ lock.acquire() global Action Action = action print("action",action) obs, rew, done, info = self.env.step(action, state_code, state_reward) self.running_reward += rew score = self.running_reward if done else 0 if lock_s.locked(): lock_s.release() try: conn = sqlite3.connect( self.sql_path ) c = conn.cursor() sql = "INSERT INTO STOKE (TIME, RESULT, REWARD) \ VALUES (?, ?, ?)" c.execute( sql, (time.time(), state_code.replace("nop\n", ""), rew) ) conn.commit() conn.close() except Exception as e: print(e) return ( {"obs": obs, "action_mask": np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])}, score, done, info, )
[docs] def set_state(self, state): """ Set policy to specific state and action mask. :param state: Current reward and environments :return: state and action mask """ self.env = deepcopy(state[0]) self.running_reward = state[1] obs = np.array(list(self.env.unwrapped.state)) return {"obs": obs, "action_mask": np.array([1, 1, 1, 1, 1, 1, 1, 1, 1])}
[docs] def get_state(self): """Returns actor state. :return: current environment and reward """ return deepcopy(self.env), self.running_reward