-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathbeta_function.py
More file actions
executable file
·40 lines (25 loc) · 1.1 KB
/
beta_function.py
File metadata and controls
executable file
·40 lines (25 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
Referenced papar <Implicit Quantile Networks for Distributional Reinforcement Learning>
"""
import torch
from typing import Union
beta_function_map = {}
beta_function_map['uniform'] = lambda x: x
# For beta functions, concavity corresponds to risk-averse and convexity to risk-seeking policies
# For CPW, eta = 0.71 most closely match human subjects
# this function is locally concave for small values of τ and becomes locally convex for larger values of τ
def cpw(x: Union[torch.Tensor, float], eta: float = 0.71) -> Union[torch.Tensor, float]:
return (x ** eta) / ((x ** eta + (1 - x) ** eta) ** (1 / eta))
beta_function_map['CPW'] = cpw
# CVaR is risk-averse
def CVaR(x: Union[torch.Tensor, float], eta: float = 0.71) -> Union[torch.Tensor, float]:
assert eta <= 1.0
return x * eta
beta_function_map['CVaR'] = CVaR
# risk-averse (eta < 0) or risk-seeking (eta > 0)
def Pow(x: Union[torch.Tensor, float], eta: float = 0.0) -> Union[torch.Tensor, float]:
if eta >= 0:
return x ** (1 / (1 + eta))
else:
return 1 - (1 - x) ** (1 / 1 - eta)
beta_function_map['Pow'] = Pow