当前正在尝试为看起来像这样的数据集计算比率:
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using Polly;
using RestSharp;
using System;
using System.Collections.Generic;
using System.Net;
namespace FundsAFE.Graphite
{
public class RequestExecutor
{
private static readonly NLog.Logger logger = NLog.LogManager.GetCurrentClassLogger();
private IRestClient client;
private IRestRequest request;
private Policy<IRestResponse> retryPolicy;
public IRestResponse LastErrorResponse { get; set; }
private static readonly List<HttpStatusCode> invalidStatusCodes = new List<HttpStatusCode> {
HttpStatusCode.BadGateway,
HttpStatusCode.Unauthorized,
HttpStatusCode.InternalServerError,
HttpStatusCode.RequestTimeout,
HttpStatusCode.BadRequest,
HttpStatusCode.Forbidden,
HttpStatusCode.GatewayTimeout
};
public RequestExecutor(IRestClient client, IRestRequest request)
{
this.client = client;
this.request = request;
}
public IRestResponse Execute(int retryCount, int delay)
{
retryPolicy = Policy
.HandleResult<IRestResponse>(resp => invalidStatusCodes.Contains(resp.StatusCode) || !IsValidJson(resp))
.WaitAndRetry(retryCount, i => TimeSpan.FromMilliseconds(delay), (result, timeSpan, currentRetryCount, context) =>
{
//Status code here is sometimes 0???
logger.Error($"Request failed with {result.Result.StatusCode}. Waiting {timeSpan} before next retry. Retry attempt {currentRetryCount}");
LastErrorResponse = result.Result;
});
var policyResponse = retryPolicy.ExecuteAndCapture(() =>
{
var url = client.BuildUri(request);
logger.Debug(url.ToString());
var response = client.Execute(request);
return response;
});
if(policyResponse.Result != null)
{
return policyResponse.Result;
} else
{
return LastErrorResponse;
}
}
public static bool IsValidJson(IRestResponse response)
{
if (response.Content.Length == 0)
{
//Empty response treated as invalid
return false;
}
try
{
var parsed = JObject.Parse(response.Content);
}
catch (JsonReaderException e)
{
//Will catch any mallformed json
return false;
}
return true;
}
}
}
using Microsoft.VisualStudio.TestTools.UnitTesting;
using FundsAFE.Graphite;
using Moq;
using RestSharp;
using System.Net;
using FluentAssertions;
using System;
using FluentAssertions.Extensions;
namespace FundsAFE.Test.Moq
{
[TestClass]
public class MoqUnitTestRequest
{
public Mock<IRestClient> CreateMockClientWithStatusCodeAndContent(HttpStatusCode code, string content)
{
Mock<IRestClient> mockClient = new Mock<IRestClient>();
mockClient.Setup(c => c.Execute(It.IsAny<IRestRequest>())).Returns(
new RestResponse
{
Content = content,
StatusCode = code
}
);
mockClient.Setup(c => c.BuildUri(It.IsAny<IRestRequest>())).Returns(
new Uri("http://fake.fake")
);
return mockClient;
}
[DataTestMethod]
[DataRow(HttpStatusCode.BadGateway)]
[DataRow(HttpStatusCode.Unauthorized)]
[DataRow(HttpStatusCode.InternalServerError)]
[DataRow(HttpStatusCode.RequestTimeout)]
[DataRow(HttpStatusCode.BadRequest)]
[DataRow(HttpStatusCode.Forbidden)]
[DataRow(HttpStatusCode.GatewayTimeout)]
public void TestBadStatusCodesAndRetry(HttpStatusCode httpStatusCode) {
//Arrange
Mock<IRestRequest> mockRequest = new Mock<IRestRequest>();
Mock<IRestClient> mockClient = CreateMockClientWithStatusCodeAndContent(httpStatusCode, "fakecontent");
RequestExecutor requestExecutor = new RequestExecutor(mockClient.Object, mockRequest.Object);
int retries = 10;
int delay = 50;
int totalWaitTime = (retries * delay) - 10; //10ms error margin
//Act and Verify
var response = requestExecutor.Execute(retryCount: retries, delay: 101);
mockClient.Verify(x => x.Execute(It.IsAny<IRestRequest>()), Times.Exactly(retries + 1)); //1st failed attempt + 10 retries = 11
//Assert
requestExecutor.ExecutionTimeOf(re => re.Execute(retries, delay)).Should().BeGreaterOrEqualTo(totalWaitTime.Milliseconds());
response.Should().NotBeNull();
response.StatusCode.Should().Be(httpStatusCode);
requestExecutor.LastErrorResponse.StatusCode.Should().Be(httpStatusCode);
}
[DataTestMethod]
//Empty content
[DataRow("")]
//Missing closing quote
[DataRow("{\"fruit\": \"Apple,\"size\": \"Large\",\"color\": \"Red\"}")]
//Missing angle bracket
[DataRow("\"q1\": {\"question\": \"Which one is correct team name in NBA?\",\"options\": \"New York Bulls\",\"Los Angeles Kings\",\"Golden State Warriros\",\"Huston Rocket\"],\"answer\": \"Huston Rocket\"}")]
//Missing curly bracket
[DataRow("\"sport\": {\"q1\": {\"question\": \"Which one is correct team name in NBA?\",\"options\": \"New York Bulls\",\"Los Angeles Kings\",\"Golden State Warriros\",\"Huston Rocket\"],\"answer\": \"Huston Rocket\"}")]
public void TestBadContentRetries(string content)
{
//Arrange
Mock<IRestRequest> mockRequest = new Mock<IRestRequest>();
Mock<IRestClient> mockClient = CreateMockClientWithStatusCodeAndContent(HttpStatusCode.OK, content);
RequestExecutor requestExecutor = new RequestExecutor(mockClient.Object, mockRequest.Object);
int retries = 10;
int delay = 50;
int totalWaitTime = (retries * delay) - 10; //10ms error margin
//Act and Verify
var response = requestExecutor.Execute(retryCount: retries, delay: delay);
mockClient.Verify(x => x.Execute(It.IsAny<IRestRequest>()), Times.Exactly(retries + 1)); //1st failed attempt + 10 retries = 11
//Assert
requestExecutor.ExecutionTimeOf(re => re.Execute(retries, delay)).Should().BeGreaterOrEqualTo(totalWaitTime.Milliseconds());
response.Should().NotBeNull();
}
}
}
此数据集是一个熊猫数据框。我的目标是计算从一个国家迁移到另一个国家的比率。例如,从“ foo”到“ bar”的迁移率与从“ bar”到“ foo”的迁移率。 在这种情况下,它将是123/222 = 0.55
此外,如果可能的话,例如,以以下方式将它们分组到单个数据集或多个子集中:
Country A | Country B | Migrants from A to B
foo bar 123
foo qux 221
bar qux 133
qux foo 312
bar foo 222
如何使用熊猫,numpy等来完成??
曾经试图像这样对它们进行分组(尽管我什至无法开始合理化):
Country A | Country B | A to B ratio
foo bar 0.55
bar foo 1.88
foo qux 0.71
qux foo 1.41
.. .. ..
由于我缺乏对熊猫操作的知识,所以无法想到可能性。 即使有解决此问题的丑陋方法,任何建议也将有所帮助。 谢谢!
答案 0 :(得分:3)
您可以将表merge
自身(在SQL中自行连接):
df.columns = ['Country_A', 'Country_B', 'A_to_B']
df1 = pd.merge(df, df, left_on=['Country_A', 'Country_B'], right_on=['Country_B', 'Country_A'])
df['ratio'] = df1['A_to_B_x'] / df1['A_to_B_y']
答案 1 :(得分:2)
使用DataFrame.pivot_table
与DataFrame.lookup
进行映射:
mapper=df.pivot_table(index='CountryA',columns='CountryB',values='MigrantsfromAtoB')
df['ratio']=df['MigrantsfromAtoB']/mapper.lookup(df['CountryB'],df['CountryA'])
print(df)
CountryA CountryB MigrantsfromAtoB ratio
0 foo bar 123 0.554054
1 foo qux 221 0.708333
2 bar qux 133 NaN
3 qux foo 312 1.411765
4 bar foo 222 1.804878
答案 2 :(得分:1)
您可以循环执行此操作,但@ansev的回答更合乎逻辑:
df = pd.DataFrame({
"A": ["foo", "foo", "bar", "qux", "bar", "qux"], "B": ["bar", "qux", "qux",
"foo", "foo", "bar"], "AtoB": [123, 221, 133, 312, 222, 444]
})
df["duals"] = df["A"] + df["B"]
df["inv_duals"] = df["B"] + df["A"]
for i in range(len(df)):
df.loc[i,"AtoB_Ratio"] = df.loc[i, "AtoB"] / df.loc[df["duals"].isin([df.loc[i, "inv_duals"]]), "AtoB"].item()
df = df.drop(["duals", "inv_duals"], axis=1)
输出:
A B AtoB AtoB_Ratio
0 foo bar 123 0.554054
1 foo qux 221 0.708333
2 bar qux 133 0.299550
3 qux foo 312 1.411765
4 bar foo 222 1.804878
5 qux bar 444 3.338346
答案 3 :(得分:1)
df.columns = ['A','B','AtoB']
df1 = df.pivot_table('AtoB', index = ['A'], columns = ['B'])
df2 = pd.DataFrame(df1.to_numpy()/df1.to_numpy().T, columns = df1.columns, index = df1.index)
df2
Out[1]:
B bar foo qux
A
bar NaN 1.804878 NaN
foo 0.554054 NaN 0.708333
qux NaN 1.411765 NaN
df2 = df2.reset_index().melt(id_vars = ['A'],value_vars = df1.columns, value_name='ratio')
df2.drop(df2[df2['A']==df2['B']].index)
Out[2]:
A B ratio
1 foo bar 0.554054
2 qux bar NaN
3 bar foo 1.804878
5 qux foo 1.411765
6 bar qux NaN
7 foo qux 0.708333