我正在尝试过滤 X 列并获取所有缺失的周数据。
例如,一个 df(样本日期,在实际 df 中我们将拥有整周的数据):
mean_date column X
2021-04-01 x_123
2021-04-01 y_324
2021-04-02 x_123
2021-04-03 x_123
我需要找到本周对应的 X 列的所有缺失日期,即
result_df:
mean_date_missing column_X
2021-03-28 x_123
2021-03-29 x_123
2021-03-30 x_123
2021-03-31 x_123
..
2021-03-28 y_324
2021-03-29 y_324
2021-03-30 y_324
2021-03-31 y_324
2021-04-02 y_324
2021-04-03 y_324
答案 0 :(得分:0)
可能不是最优雅的方法,但我认为这可行:
首先,我会得到该周所有日期的列表,我们称之为 #include <iostream>
#include <thread>
#include <vector>
#include <atomic>
#include <cstdlib>
#define _WINSOCK_DEPRECATED_NO_WARNINGS
#include <winsock2.h>
#include <Windows.h>
#include <ws2tcpip.h>
#pragma comment(lib, "Ws2_32.lib")
std::atomic<bool> stopRunning = false;
void server()
{
int iResult;
sockaddr_in listen_address;
listen_address.sin_family = AF_INET;
listen_address.sin_port = htons(1000);
listen_address.sin_addr.S_un.S_addr = INADDR_ANY;
SOCKET listen_socket = socket(AF_INET, SOCK_STREAM, 0);
if (listen_socket == INVALID_SOCKET) {
iResult = WSAGetLastError();
std::cerr << "socket failed: " << iResult << "\n";
return;
}
u_long mode = 1;
if (ioctlsocket(listen_socket, FIONBIO, &mode) == SOCKET_ERROR) {
iResult = WSAGetLastError();
std::cerr << "ioctlsocket failed: " << iResult << "\n";
closesocket(listen_socket);
return;
}
if (bind(listen_socket, (sockaddr*)&listen_address, sizeof(listen_address)) == SOCKET_ERROR) {
iResult = WSAGetLastError();
std::cerr << "bind failed: " << iResult << "\n";
closesocket(listen_socket);
return;
}
if (listen(listen_socket, SOMAXCONN) == SOCKET_ERROR) {
iResult = WSAGetLastError();
std::cerr << "listen failed: " << iResult << "\n";
closesocket(listen_socket);
return;
}
std::vector<SOCKET> clients;
SOCKET client_socket;
sockaddr client_info;
int addrlen;
fd_set rfds;
timeval timeout;
char buffer[1024];
while (!stopRunning.load()) {
FD_ZERO(&rfds);
FD_SET(listen_socket, &rfds);
for(auto sckt : clients) {
FD_SET(sckt, &rfds);
}
timeout.tv_sec = 1;
timeout.tv_usec = 0;
iResult = select(0, &rfds, NULL, NULL, &timeout);
if (iResult < 0) {
iResult = WSAGetLastError();
std::cerr << "select failed: " << iResult << "\n";
break;
}
if (iResult == 0)
continue;
if (FD_ISSET(listen_socket, &rfds)) {
addrlen = sizeof(client_info);
client_socket = accept(listen_socket, (struct sockaddr*)&client_info, &addrlen);
if (client_socket == INVALID_SOCKET) {
iResult = WSAGetLastError();
std::cerr << "accept failed: " << iResult << "\n";
break;
}
if (ioctlsocket(client_socket, FIONBIO, &mode) == SOCKET_ERROR) {
iResult = WSAGetLastError();
std::cerr << "ioctlsocket failed: " << iResult << "\n";
closesocket(client_socket);
}
else {
std::cout << "New connection" << std::endl;
clients.push_back(client_socket);
}
}
for (size_t i = 0; i < clients.size();) {
client_socket = clients[i];
if (FD_ISSET(client_socket, &rfds)) {
iResult = recv(client_socket, buffer, sizeof(buffer), 0);
if (iResult == SOCKET_ERROR) {
iResult = WSAGetLastError();
std::cerr << "recv failed: " << iResult << "\n";
closesocket(client_socket);
clients.erase(clients.begin()+i);
continue;
}
if (iResult == 0) {
std::cout << "Closing connection" << std::endl;
closesocket(client_socket);
clients.erase(clients.begin()+i);
continue;
}
std::cout.write(buffer, iResult);
std::cout << std::endl;
}
++i;
}
}
//clean up
closesocket(listen_socket);
for (auto sckt : clients) {
closesocket(sckt);
}
}
int main() {
WSADATA data;
int iResult = WSAStartup(MAKEWORD(2, 2), &data);
if (iResult != 0) {
std::cerr << "WSAStartup failed: " << iResult << "\n";
return 0;
}
std::thread server_thread(server);
std::system("pause");
stopRunning = true;
server_thread.join();
WSACleanup();
return 0;
}
。您可以手动执行此操作,也可以为其编写一个函数(如果您使用谷歌搜索,您应该找到解决方案)。所以,你现在有这样的东西:
<data-in name="test_feed" dataset="input_data">
<start-instance>${coord:current(-(coord:hoursInDay(0))}</start-instance>
<end-instance>${coord:current(0)}</end-instance>
</data-in>
接下来,我将按 weekdays
对 DataFrame 进行分组,因为您想分别为该列的每个可能值找到缺失的天数。
weekdays = ["2021-04-01", "2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05"]
然后,迭代它以找到每组缺失的日期:
column X
现在,所有必要的信息都存储在 grouped = df.groupby(df["column x"])
中。您只需要从中制作一个 DataFrame:
missing_list = []
for key, item in grouped:
existing_dates = item["mean_date"].to_list()
missing_dates = np.setdiff1d(weekdays, existing_dates)
for date in missing_dates:
missing_list.append([date, key])