# Python function returns only index

user3551261

So I'm now watching a code related to Markov Chain and the thing is that I don't have no idea how this code works.

``````transition_probabilities = [
[[0.7, 0.3, 0.0], [1.0, 0.0, 0.0], [0.8, 0.2, 0.0]], # in s0, if action a0 then proba 0.7 to state s0 and 0.3 to state s1, etc.
[[0.0, 1.0, 0.0], None, [0.0, 0.0, 1.0]],
[None, [0.8, 0.1, 0.1], None],
]

rewards = [
[[+10, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, -50]],
[[0, 0, 0], [+40, 0, 0], [0, 0, 0]],
]

possible_actions = [[0, 1, 2], [0, 2], ]

def policy_fire(state):
return [0, 2, 1][state]

def policy_random(state):
return rnd.choice(possible_actions[state])

def policy_safe(state):
return [0, 0, 1][state]

class MDPEnvironment(object):
def __init__(self, start_state=0):
self.start_state=start_state
self.reset()
def reset(self):
self.total_rewards = 0
self.state = self.start_state
def step(self, action):
next_state = rnd.choice(range(3), p=transition_probabilities[self.state][action])
reward = rewards[self.state][action][next_state]
self.state = next_state
self.total_rewards += reward
return self.state, reward

def run_episode(policy, n_steps, start_state=0, display=True):
env = MDPEnvironment()
if display:
print("States (+rewards):", end=" ")
for step in range(n_steps):
if display:
if step == 10:
print("...", end=" ")
elif step < 10:
print(env.state, end=" ")
action = policy(env.state)
state, reward = env.step(action)
if display and step < 10:
if reward:
print("({})".format(reward), end=" ")
if display:
print("Total rewards =", env.total_rewards)
return env.total_rewards

for policy in (policy_fire, policy_random, policy_safe):
all_totals = []
print(policy.__name__)
for episode in range(1000):
all_totals.append(run_episode(policy, n_steps=100, display=(episode<5)))
print("Summary: mean={:.1f}, std={:1f}, min={}, max={}".format(np.mean(all_totals), np.std(all_totals), np.min(all_totals), np.max(all_totals)))
print()
``````

In line 16, there is a function returning only index. I've never heard of anything like this. All I know about the index is that it must have a corresponding l_value so that the index can subscribe the element inside an array or whatsoever. So would you guys please tell me what's going on?

David Scarlett

Lines 15-16:

``````def policy_fire(state):
return [0, 2, 1][state]
``````

This function assumes `state` will be an integer between `0` and `2`, and uses that value to index the list `[0, 2, 1]` and return the resulting value. So e.g. `policy_fire(1)` will return `2`.

This function is called in function `run_episode`, with `env.state` as the parameter, where `env = MDPEnvironment()`.

Collected from the Internet

edited at