我被困在寻找存储在2个不同数据帧的2个不同子集中的公用密钥,然后输出第三列:
winrt::com_ptr<IMFTransform> Transform;
winrt::check_hresult(CoCreateInstance(CLSID_CResamplerMediaObject, nullptr, CLSCTX_ALL, IID_PPV_ARGS(Transform.put())));
WAVEFORMATEX InputWaveFormatEx { WAVE_FORMAT_PCM, 1, 44100, 44100 * 2, 2, 16 };
WAVEFORMATEX OutputWaveFormatEx { WAVE_FORMAT_PCM, 1, 48000, 48000 * 2, 2, 16 };
winrt::com_ptr<IMFMediaType> InputMediaType;
winrt::check_hresult(MFCreateMediaType(InputMediaType.put()));
winrt::check_hresult(MFInitMediaTypeFromWaveFormatEx(InputMediaType.get(), &InputWaveFormatEx, sizeof InputWaveFormatEx));
winrt::com_ptr<IMFMediaType> OutputMediaType;
winrt::check_hresult(MFCreateMediaType(OutputMediaType.put()));
winrt::check_hresult(MFInitMediaTypeFromWaveFormatEx(OutputMediaType.get(), &OutputWaveFormatEx, sizeof OutputWaveFormatEx));
winrt::check_hresult(Transform->SetInputType(0, InputMediaType.get(), 0));
winrt::check_hresult(Transform->SetOutputType(0, OutputMediaType.get(), 0));
MFT_OUTPUT_STREAM_INFO OutputStreamInfo { };
winrt::check_hresult(Transform->GetOutputStreamInfo(0, &OutputStreamInfo));
_A(!(OutputStreamInfo.dwFlags & MFT_OUTPUT_STREAM_SINGLE_SAMPLE_PER_BUFFER));
DWORD const InputMediaBufferSize = InputWaveFormatEx.nAvgBytesPerSec;
winrt::com_ptr<IMFMediaBuffer> InputMediaBuffer;
winrt::check_hresult(MFCreateMemoryBuffer(InputMediaBufferSize, InputMediaBuffer.put()));
winrt::check_hresult(InputMediaBuffer->SetCurrentLength(InputMediaBufferSize));
winrt::com_ptr<IMFSample> InputSample;
winrt::check_hresult(MFCreateSample(InputSample.put()));
winrt::check_hresult(InputSample->AddBuffer(InputMediaBuffer.get()));
winrt::check_hresult(Transform->ProcessInput(0, InputSample.get(), 0));
DWORD const OutputMediaBufferCapacity = OutputWaveFormatEx.nAvgBytesPerSec;
winrt::com_ptr<IMFMediaBuffer> OutputMediaBuffer;
winrt::check_hresult(MFCreateMemoryBuffer(OutputMediaBufferCapacity, OutputMediaBuffer.put()));
winrt::check_hresult(OutputMediaBuffer->SetCurrentLength(0));
winrt::com_ptr<IMFSample> OutputSample;
winrt::check_hresult(MFCreateSample(OutputSample.put()));
winrt::check_hresult(OutputSample->AddBuffer(OutputMediaBuffer.get()));
MFT_OUTPUT_DATA_BUFFER OutputDataBuffer { 0, OutputSample.get() };
DWORD Status;
winrt::check_hresult(Transform->ProcessOutput(0, 1, &OutputDataBuffer, &Status));
DWORD OutputMediaBufferSize = 0;
winrt::check_hresult(OutputMediaBuffer->GetCurrentLength(&OutputMediaBufferSize));
预期输出:
我已经完成研究……。的问题确实与此相似:How to merge pandas on string contains?。但是这里的键只有一个项目,我的示例在两个键中都有2个项目。
答案 0 :(得分:0)
假设,您的代码始终被空格分隔。
您可以使用list comprehensions
来检查Code1
列中Code2
列中每个代码的存在。通过检索匹配代码的索引,我们可以获得Dataframe
,其中包含具有重叠代码的行。
然后,我们可以更新原始数据帧以获取预期的输出。
# Create a list of matching codes
list_of_matches = df1['Code1'].apply(lambda x: [
any([word in str(list_of_words).split()
for word in str(x).split()])
for list_of_words in df2['code2']])
# Get the indices of matching codes
i, j = np.where(list_of_matches.values.tolist())
# Create a new dataframe with name and second name of rows with matching code
# And drop rows with NA, as they don't make sense
df3 = pd.DataFrame(np.column_stack([df1.loc[i], df2.loc[j]]),
columns=df1.columns.append(df2.columns)).dropna()
# Create columns in your original dataframe to be able to update the dataframe
df1['Second Name'] = np.nan
df1['code2'] = np.nan
# Update dataframe with matching rows
df1.update(df3)
输出
Name Code1 Second Name code2
0 John AAA OO Cohen AAA GGG
1 Michael BBB UU Smith UU HHH
2 Dan JJ NaN NaN
3 George NaN NaN NaN
4 Adam II Kas TT II