Python:如何循环代码,以便它一个接一个地从csv文件中提取列?

西雷

我需要帮助的问题是:如何循环代码,以便它一个接一个地从csv文件中提取列?我的excel文件中,r和m当时每个都有1列(具有相同数量的单元格)。我希望代码以与现在相同的方式执行计算,然后跳转到csv m和r中的第二列并执行相同的计算-我需要能够对所有列重复此过程(我有两个文件中大约1300列)。您能建议如何解决吗?

编码

import math
import numpy

"""
Note - for some of the metrics the absolute value is returns. This is because if the risk (loss) is higher we want to
discount the expected excess return from the portfolio by a higher amount. Therefore risk should be positive.
"""


def vol(returns):
    # Return the standard deviation of returns
    return numpy.std(returns)


def beta(returns, market):
    # Create a matrix of [returns, market]
    m = numpy.matrix([returns, market])
    # Return the covariance of m divided by the standard deviation of the market returns
    return numpy.cov(m)[0][1] / numpy.std(market)


def lpm(returns, threshold, order):
    # This method returns a lower partial moment of the returns
    # Create an array he same length as returns containing the minimum return threshold
    threshold_array = numpy.empty(len(returns))
    threshold_array.fill(threshold)
    # Calculate the difference between the threshold and the returns
    diff = threshold_array - returns
    # Set the minimum of each to 0
    diff = diff.clip(min=0)
    # Return the sum of the different to the power of order
    return numpy.sum(diff ** order) / len(returns)


def hpm(returns, threshold, order):
    # This method returns a higher partial moment of the returns
    # Create an array he same length as returns containing the minimum return threshold
    threshold_array = numpy.empty(len(returns))
    threshold_array.fill(threshold)
    # Calculate the difference between the returns and the threshold
    diff = returns - threshold_array
    # Set the minimum of each to 0
    diff = diff.clip(min=0)
    # Return the sum of the different to the power of order
    return numpy.sum(diff ** order) / len(returns)


def var(returns, alpha):
    # This method calculates the historical simulation var of the returns
    sorted_returns = numpy.sort(returns)
    # Calculate the index associated with alpha
    index = int(alpha * len(sorted_returns))
    # VaR should be positive
    return abs(sorted_returns[index])


def cvar(returns, alpha):
    # This method calculates the condition VaR of the returns
    sorted_returns = numpy.sort(returns)
    # Calculate the index associated with alpha
    index = int(alpha * len(sorted_returns))
    # Calculate the total VaR beyond alpha
    sum_var = sorted_returns[0]
    for i in range(1, index):
        sum_var += sorted_returns[i]
    # Return the average VaR
    # CVaR should be positive
    return abs(sum_var / index)


def prices(returns, base):
    # Converts returns into prices
    s = [base]
    for i in range(len(returns)):
        s.append(base * (1 + returns[i]))
    return numpy.array(s)


def dd(returns, tau):
    # Returns the draw-down given time period tau
    values = prices(returns, 100)
    pos = len(values) - 1
    pre = pos - tau
    drawdown = float('+inf')
    # Find the maximum drawdown given tau
    while pre >= 0:
        dd_i = (values[pos] / values[pre]) - 1
        if dd_i < drawdown:
            drawdown = dd_i
        pos, pre = pos - 1, pre - 1
    # Drawdown should be positive
    return abs(drawdown)


def max_dd(returns):
    # Returns the maximum draw-down for any tau in (0, T) where T is the length of the return series
    max_drawdown = float('-inf')
    for i in range(0, len(returns)):
        drawdown_i = dd(returns, i)
        if drawdown_i > max_drawdown:
            max_drawdown = drawdown_i
    # Max draw-down should be positive
    return abs(max_drawdown)


def average_dd(returns, periods):
    # Returns the average maximum drawdown over n periods
    drawdowns = []
    for i in range(0, len(returns)):
        drawdown_i = dd(returns, i)
        drawdowns.append(drawdown_i)
    drawdowns = sorted(drawdowns)
    total_dd = abs(drawdowns[0])
    for i in range(1, periods):
        total_dd += abs(drawdowns[i])
    return total_dd / periods


def average_dd_squared(returns, periods):
    # Returns the average maximum drawdown squared over n periods
    drawdowns = []
    for i in range(0, len(returns)):
        drawdown_i = math.pow(dd(returns, i), 2.0)
        drawdowns.append(drawdown_i)
    drawdowns = sorted(drawdowns)
    total_dd = abs(drawdowns[0])
    for i in range(1, periods):
        total_dd += abs(drawdowns[i])
    return total_dd / periods


def treynor_ratio(er, returns, market, rf):
    return (er - rf) / beta(returns, market)


def sharpe_ratio(er, returns, rf):
    return (er - rf) / vol(returns)


def information_ratio(returns, benchmark):
    diff = returns - benchmark
    return numpy.mean(diff) / vol(diff)


def modigliani_ratio(er, returns, benchmark, rf):
    np_rf = numpy.empty(len(returns))
    np_rf.fill(rf)
    rdiff = returns - np_rf
    bdiff = benchmark - np_rf
    return (er - rf) * (vol(rdiff) / vol(bdiff)) + rf


def excess_var(er, returns, rf, alpha):
    return (er - rf) / var(returns, alpha)


def conditional_sharpe_ratio(er, returns, rf, alpha):
    return (er - rf) / cvar(returns, alpha)


def omega_ratio(er, returns, rf, target=0):
    return (er - rf) / lpm(returns, target, 1)


def sortino_ratio(er, returns, rf, target=0):
    return (er - rf) / math.sqrt(lpm(returns, target, 2))


def kappa_three_ratio(er, returns, rf, target=0):
    return (er - rf) / math.pow(lpm(returns, target, 3), float(1/3))


def gain_loss_ratio(returns, target=0):
    return hpm(returns, target, 1) / lpm(returns, target, 1)


def upside_potential_ratio(returns, target=0):
    return hpm(returns, target, 1) / math.sqrt(lpm(returns, target, 2))


def calmar_ratio(er, returns, rf):
    return (er - rf) / max_dd(returns)


def sterling_ration(er, returns, rf, periods):
    return (er - rf) / average_dd(returns, periods)


def burke_ratio(er, returns, rf, periods):
    return (er - rf) / math.sqrt(average_dd_squared(returns, periods))


def test_risk_metrics(r, m):
    print("vol =", vol(r))
    print("beta =", beta(r, m))
    print("hpm(0.0)_1 =", hpm(r, 0.0, 1))
    print("lpm(0.0)_1 =", lpm(r, 0.0, 1))
    print("VaR(0.05) =", var(r, 0.05))
    print("CVaR(0.05) =", cvar(r, 0.05))
    print("Drawdown(5) =", dd(r, 5))
    print("Max Drawdown =", max_dd(r))


def test_risk_adjusted_metrics(r, m):
    # Returns from the portfolio (r) and market (m)
    # Expected return
    e = numpy.mean(r)
    # Risk free rate
    f = 0.06
    # Risk-adjusted return based on Volatility
    print("Treynor Ratio =", treynor_ratio(e, r, m, f))
    print("Sharpe Ratio =", sharpe_ratio(e, r, f))
    print("Information Ratio =", information_r
          atio(r, m))
    # Risk-adjusted return based on Value at Risk
    print("Excess VaR =", excess_var(e, r, f, 0.05))
    print("Conditional Sharpe Ratio =", conditional_sharpe_ratio(e, r, f, 0.05))
    # Risk-adjusted return based on Lower Partial Moments
    print("Omega Ratio =", omega_ratio(e, r, f))
    print("Sortino Ratio =", sortino_ratio(e, r, f))
    print("Kappa 3 Ratio =", kappa_three_ratio(e, r, f))
    print("Gain Loss Ratio =", gain_loss_ratio(r))
    print("Upside Potential Ratio =", upside_potential_ratio(r))
    # Risk-adjusted return based on Drawdown risk
    print("Calmar Ratio =", calmar_ratio(e, r, f))
    print("Sterling Ratio =", sterling_ration(e, r, f, 5))
    print("Burke Ratio =", burke_ratio(e, r, f, 5))


if __name__ == "__main__":
    import csv

    # load r
    with open(r'C:\Users\Lenovo\Documents\r.csv') as csvfile:  # change your filename here
        r = numpy.array([float(x[0]) for x in csv.reader(csvfile)])

    # load m
    with open(r'C:\Users\Lenovo\Documents\m.csv') as csvfile:  # change your filename here
        m = numpy.array([float(x[0]) for x in csv.reader(csvfile)])

    test_risk_metrics(r, m)
    test_risk_adjusted_metrics(r, m)
Scratch'N'Purr

由于您提到每列的长度可能有所不同,因此我提出了一种解决方案,其中逐行而不是逐列读取randm文件。之所以这样,是因为用变长列进行迭代会带来问题,但更重要的是,这还意味着我们必须将整个CSV加载到内存中,然后在列上进行迭代。当我们逐行读取时,我们会使用较少的内存,而不必担心每行中元素长度的变化。

由于我们是逐行阅读的,因此我们不再需要依赖于csv软件包。我们可以简单地将文件加载为文本文件,并用空格,逗号或您认为合适的其他标点符号分隔值。出于本示例的目的,我将使用逗号分隔值。

假设我们的r_values文件位于下面,其中文件中的每一行代表要馈送到您的函数的值的数组:

1.22,3.33,3.24,0.32,0.13
2.42,35.43,2.43,87.77,0.98,0.32,32.43,9.56,74.32,2.32
8.78,0.23,64.61,7.23,8.77,76.77

我们的m_values文件是:

4.23,7.56,98.65,4.87,9.32
3.34,9.45,0.32,86.44,9.45,3.53,0.65,0.43,1.43,65.54
3.34,89.54,8.43,7.54,83.2,8.43

现在在我们的代码__name__ == '__main__'块中,我们加载文件,并遍历各行,同时将它们传递给test_risk_metricsandtest_risk_adjusted_metrics函数:

if __name__ == "__main__":
    with open(r'C:\path\to\r_values.csv') as r_file, \
         open(r'C:\path\to\m_values.csv') as m_file:
        for r, m in zip(r_file, m_file):
            # since our lines are separated by commas, we use `split` function
            # we also cast our values as float
            r = numpy.array([float(x) for x in r.split(',')])
            m = numpy.array([float(x) for x in m.split(',')])

            # diagnostic check
            print(r)  # comment out
            print(m)  # comment out

            # pass to `test_risk_metrics` and `test_risk_adjusted_metrics`
            test_risk_metrics(r, m)
            test_risk_adjusted_metrics(r, m)

最后,这是输出:

[1.22 3.33 3.24 0.32 0.13]
[ 4.23  7.56 98.65  4.87  9.32]
vol = 1.3866996790942157
beta = 0.9980359303098474
hpm(0.0)_1 = 1.6480000000000001
lpm(0.0)_1 = 0.0
VaR(0.05) = 0.13
test.py:68: RuntimeWarning: divide by zero encountered in double_scalars
  return abs(sum_var / index)
CVaR(0.05) = inf
Drawdown(5) = 0.1299999999999999
Max Drawdown = 0.7390300230946882
Treynor Ratio = 1.591125080543938
Sharpe Ratio = 1.145165044703315
Information Ratio = -0.6443354312329719
Excess VaR = 12.215384615384616
Conditional Sharpe Ratio = 0.0
test.py:162: RuntimeWarning: divide by zero encountered in double_scalars
  return (er - rf) / lpm(returns, target, 1)
Omega Ratio = inf
test.py:166: RuntimeWarning: divide by zero encountered in double_scalars
  return (er - rf) / math.sqrt(lpm(returns, target, 2))
Sortino Ratio = inf
test.py:170: RuntimeWarning: divide by zero encountered in double_scalars
  return (er - rf) / math.pow(lpm(returns, target, 3), float(1/3))
Kappa 3 Ratio = inf
test.py:174: RuntimeWarning: divide by zero encountered in double_scalars
  return hpm(returns, target, 1) / lpm(returns, target, 1)
Gain Loss Ratio = inf
test.py:178: RuntimeWarning: divide by zero encountered in double_scalars
  return hpm(returns, target, 1) / math.sqrt(lpm(returns, target, 2))
Upside Potential Ratio = inf
Calmar Ratio = 2.1487625
Sterling Ratio = 2.993751401271527
Burke Ratio = 2.647015918149671
[ 2.42 35.43  2.43 87.77  0.98  0.32 32.43  9.56 74.32  2.32]
[ 3.34  9.45  0.32 86.44  9.45  3.53  0.65  0.43  1.43 65.54]
vol = 30.812687581579116
beta = 14.103506402406339
hpm(0.0)_1 = 24.798
lpm(0.0)_1 = 0.0
VaR(0.05) = 0.32
CVaR(0.05) = inf
Drawdown(5) = 0.6140350877192983
Max Drawdown = 0.9851301115241635
Treynor Ratio = 1.7540318906636725
Sharpe Ratio = 0.8028510961435648
Information Ratio = 0.20592426973227423
Excess VaR = 77.30624999999999
Conditional Sharpe Ratio = 0.0
Omega Ratio = inf
Sortino Ratio = inf
Kappa 3 Ratio = inf
Gain Loss Ratio = inf
Upside Potential Ratio = inf
Calmar Ratio = 25.111403773584907
Sterling Ratio = 78.07671376290729
Burke Ratio = 50.392183664218216
[ 8.78  0.23 64.61  7.23  8.77 76.77]
[ 3.34 89.54  8.43  7.54 83.2   8.43]
vol = 30.714112074998287
beta = -18.831320000339733
hpm(0.0)_1 = 27.731666666666666
lpm(0.0)_1 = 0.0
VaR(0.05) = 0.23
CVaR(0.05) = inf
Drawdown(5) = 6.9519427402863
Max Drawdown = 6.9519427402863
Treynor Ratio = -1.4694491233842049
Sharpe Ratio = 0.9009430778626281
Information Ratio = -0.09563177846201822
Excess VaR = 120.31159420289855
Conditional Sharpe Ratio = 0.0
Omega Ratio = inf
Sortino Ratio = inf
Kappa 3 Ratio = inf
Gain Loss Ratio = inf
Upside Potential Ratio = inf
Calmar Ratio = 3.9804221209001316
Sterling Ratio = 73.39338628531124
Burke Ratio = 50.28169156965575

本文收集自互联网,转载请注明来源。

如有侵权,请联系 [email protected] 删除。

编辑于
0

我来说两句

0 条评论
登录 后参与评论

相关文章

如何从csv文件中提取一个numpy数组?

如何使用python代码将excel数据从列中提取到一个用逗号分隔的.txt文件?

python从两个csv文件中提取列并将其合并为一个新的csv文件

如何从多个CSV文件中提取一行到一个新文件

完成后,一个接一个地复制列表中提到的文件

如何创建一个函数管道,以便函数一个接一个地运行?

如何从一个文件中提取列并移动到另一个文件而不打印列?

R:如何在多个csv中提取列,然后在一个文件夹中写入多个csv

如何使用Powershell从多个csv文件中提取一个特定的列(没有标题,说第2列)?

如何从许多HTML文件中提取表到一个csv文件中?

如何从tar.xz中提取一个文件

如何从内部存储中提取一个zip文件?

从 Json 文件中提取一个值(python)

如何在循环中一个接一个地运行python脚本序列?

如何从 csv 文件中提取最后一个交易日?

如何从不同的文件中提取特定的列并在一个文件中输出?

如何在一个 FOR 循环中从 JSON 多字典中提取数据 - Python

如何拆分 PHP 代码,以便它显示一个文本文件的结果,一个文本文件的结果一个下一个?

如何用python和美丽的汤从html代码中提取一个小时

如何使用python从另一个文件中的多个文件中提取数据?

如何强制2个Python子进程一个接一个地运行

如何一个接一个地部署多个 serverless.yml 文件(顺序)

如何从json文件一个接一个地获取多个json数据?

如何在一个文件夹中一个接一个地运行多个python文件

如何从python上的PDF文件中提取一个单词的多个实例?

python如何计算从另一个输入文件中提取的出现次数

如何在bash中一个接一个地打印列?

如何使用AWK将某些列从一个CSV文件提取到另一个?

如何从多个文件中提取一列,然后将这些列粘贴到一个文件中?