在熊猫中将多行追加到单行

时间:2020-05-08 08:05:24

标签: pandas

这是来自以下question的后续行动。

这是我的数据框:

Sub Dokumentenbefuellung() 
    Application.ScreenUpdating = False
    Const wdReplaceAll = 2
    Const wdNoProtection = -1
    Dim oAppWD As Object, oDoc As Object
    Dim x, i, a, b, y  As Variant
    Dim Dokumente, Ueberschrift, strString, Oberordner, Name As String
    Dim rngCell As Range
     
    Dokumente = "Source"
    Oberordner = ActiveWorkbook.Sheets("Eingabefenster").Range("B6").Value
    Name= ActiveWorkbook.Sheets("Eingabefenster").Range("B18").Value
    If Dir(Dokumente) <> "" Then ' Falls ein Dokument existiert, soll die Word Applikation gestartet werden
        Set oAppWD = CreateObject("Word.Application") 'Word als Object starten
    Else
        MsgBox "Die zu öffnende Dokumentdatei wurde nicht gefunden!", vbCritical, "Word-Datei öffnen"
        End
    End If
        
    If Not oAppWD Is Nothing Then
        oAppWD.Visible = True
        If oAppWD.Options.AllowReadingMode = True Then 'Word nicht im Lesemodus starten bei Schreibgeschützten Dokumenten
            oAppWD.Options.AllowReadingMode = False
        End If
    End If
        
    b = ActiveWorkbook.Sheets("Inhalteeinfuegen").Cells(Rows.Count, 1).End(xlUp).Row
     
    For a = 2 To b
           
        Set oDoc = oAppWD.Documents.Open(Dokumente)
        Application.DisplayAlerts = False
        If Not oDoc Is Nothing Then
            If oDoc.ProtectionType <> wdNoProtection Then
                oDoc.Unprotect
            End If
        End If
           
        ThisWorkbook.Activate
        Sheets("Inhalteeinfuegen").Activate
    
        Ueberschrift = "Überschrift" & " " & ActiveWorkbook.Sheets("Inhalteeinfuegen").Cells(a, 2).Value
        ThisWorkbook.Activate
        With oAppWD.Selection.Find      
            .Forward = True
            .ClearFormatting
            .Style = Ueberschrift
            .MatchWholeWord = True
            .MatchCase = False
            .Wrap = wdfindContinue
            .Execute FindText:=ActiveWorkbook.Sheets("Inhalteeinfuegen").Cells(a, 3).Value
        End With

        oAppWD.Selection.InsertParagraphAfter
        oAppWD.Selection.InsertParagraphAfter
        oAppWD.Selection.InsertAfter Text:=ActiveWorkbook.Sheets("Inhalteeinfuegen").Cells(a, 4).Value
               
    Next a
            
    oDoc.Save         'Dokument speichern
    oDoc.Close        'Dokument schließen
    oAppWD.Quit       'Word schließen
    Set oAppWD = Nothing
    Set oDoc = Nothing
End Sub

我想在nan = "" d = {'NAME': ['a','a','b','b','c','c','c','c','c','d','d','d','d','d','d'], 'col1': ['P100','P100','P100','P100','MS','MS','MS','MS','MS','MS','MS','MS','MS','MS','MS'], 'col2': ['CNMZ', 'CNMZ', 'COMX', 'COMX', '_NCTE', '_NCTE', '_NCTE', '_NCTE', '_NCTE', 'T1MF', 'T1MF', 'T1MF', 'T1MF', 'T1MF', 'T1MF'], 'stepNo': [1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6], 'col4': ['xyz', 'abc', 'pqr', 'gvt', 'mno', 'tru', 'ercm', 'lotr', 'ddlj', 'refv', 'ecv', 'ecv', 'ecv', 'ecv', 'ecv'], 'col5': ['PHL', 'PHL', 'BHL', 'ALT', 'MRS', 'MRS', 'TUL', 'MRS', 'FAT', 'PHL', 'PHL', 'JEN', 'FTW', 'AMB', 'KGP'], 'col6': ['CP', 'CO', 'CP', 'CO', 'CP', 'CO', 'CO', 'CO', 'RT', 'CO', 'CO', 'CO', 'CP', 'CO', 'CO'], 'col7': ['PHL', 'PHL', 'ALT', 'ALT', 'MRS', 'TUL', 'MRS', 'FAT', 'FAH', 'PHL', 'JEN', 'FTW', 'AMB', 'KGP', 'KGP'], 'col8': ['CO', 'CO', 'CO', 'CO', 'CO', 'CO', 'CO', 'RT', 'CP', 'CO', 'CO', 'CP', 'CO', 'CO', 'CO'], 'col9': ['SID', 'M/M', 'SID', 'U/D', 'AL LO', 'AL LO', 'AL LO', 'AL LO', 'AL LO', 'M/M', 'DCS', 'DCS', 'DCS', 'DCS', 'DCS'], 'col10': ['SID', 'M/M', 'SID', 'U/D', 'AL LO', '3 M', '3 M', 'M/M', 'AL LO', 'M/M', 'DCS', 'DCS', 'DCS', 'DCS', 'DCS'], 'col11': [nan, 'ATM', nan, 'PACK', 'AL LP', 'DCS', 'DCS', 'DAM', 'DAM', 'DCS', 'DCS', 'DCS', 'DCS', 'DCS', 'M/M'], 'col12': [nan, 'SID', nan, 'PACK', 'CAL LO', 'DCS', 'DCS', 'M/M', 'CAL LO', 'DCS', 'DCS', 'DCS', 'DCS', 'DCS', 'AL LO'], 'col13': ['abc', '-02-1_', '-1', '-13_', nan, nan, nan, 'T1_VT1.', nan, '-06', nan, nan, nan, nan, '-03_02-03'], 'col14': [nan, nan, nan, nan, '102/', '102/', '102/', nan, '101/', nan, '3405', '3102/', '3111/', '3102/', nan]} df = pd.DataFrame(d) 中将具有相同值的行打印在使用NAME的所有列中。例如。

输出:

stepNo

由于NAME col1 col2 stepNo col4...........col14 NAME col1 col2 stepNo col4...........col14 NAME col1 col2 stepNo col4...........col14 NAME col1 col2 stepNo col4...........col14 a P100 CNMZ 1 xyz nan a P100 CNMZ 2 abc nan b P100 COMX 1 pqr nan b P100 COMX 2 gvt nan c MS _NCTE 1 mno 102/ c MS _NCTE 2 tru 102/ c MS _NCTE 3 ercm 102/ c MS _NCTE 4 lotr nan 具有5行,因此输出中将有1列以上的列。

对于NAME=3,输出中将有6组列。

我希望示例输出具有足够的描述性。如果没有,您可以在开始时参考链接的问题以更好地理解。

这是建议的解决方案,但在上面的示例中失败了:

NAME=4

是否可以获得预期的输出?

1 个答案:

答案 0 :(得分:1)

map_name = dict( enumerate(df['NAME'].factorize()[1] ) )
map_name = dict(map(reversed, map_name.items()))
df2 = np.zeros((df.NAME.nunique(), df.stepNo.max() * len(df.columns))).astype(object)

for i,g in df.groupby(['NAME','stepNo']):

    df2[map_name[i[0]], (i[1]-1)*len(df.columns):(i[1])*len(df.columns)] = g.values

df2 = pd.DataFrame(df2)
df2.columns = np.tile(df.columns, df.stepNo.max())
df2['stepNo'] = df2['stepNo'].astype(int)