我有一个带有架构的数据框
root
|-- AppUsers: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Email: string (nullable = true)
| | |-- FirstName: string (nullable = true)
| | |-- LastName: string (nullable = true)
| | |-- UserName: string (nullable = true)
|-- BusinessLines: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Name: string (nullable = true)
|-- Campaigns: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- BusinessLineId: integer (nullable = true)
| | |-- Name: string (nullable = true)
| | |-- StartDate: date (nullable = true)
| | |-- EndDate: date (nullable = true)
| | |-- Imported: boolean (nullable = true)
| | |-- IsClosed: string (nullable = true)
|-- CampaignDomains: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- CampaignId: integer (nullable = true)
| | |-- DomainId: integer (nullable = true)
|-- CampaignDomainEntityComments: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- CampaignId: integer (nullable = true)
| | |-- DomainId: integer (nullable = true)
| | |-- EntityId: integer (nullable = true)
| | |-- Comment: string (nullable = true)
|-- CampaignEntities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- CampaignId: integer (nullable = true)
| | |-- EntityId: integer (nullable = true)
| | |-- ClosedDate: date (nullable = true)
| | |-- ClosedBy: string (nullable = true)
|-- CampaignDomainEntities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- DomainId: integer (nullable = true)
| | |-- CampaignId: integer (nullable = true)
| | |-- EntityId: integer (nullable = true)
| | |-- Status: string (nullable = true)
| | |-- ValidationDate: date (nullable = true)
| | |-- ValidatedBy: string (nullable = true)
|-- Domains: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- BusinessLineId: integer (nullable = true)
| | |-- Name: string (nullable = true)
| | |-- Order: integer (nullable = true)
| | |-- Enabled: boolean (nullable = true)
|-- Entities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- BasesClient: string (nullable = true)
| | |-- BusinessLineId: integer (nullable = true)
| | |-- Name: string (nullable = true)
| | |-- Pole: string (nullable = true)
| | |-- PoleCode: string (nullable = true)
| | |-- PoleLabel: string (nullable = true)
| | |-- Transactions: string (nullable = true)
| | |-- Enabled: boolean (nullable = true)
| | |-- ELRId: string (nullable = true)
| | |-- ELRDescription: string (nullable = true)
| | |-- UOId: string (nullable = true)
| | |-- UODescription: string (nullable = true)
|-- Groups: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- BusinessLine: integer (nullable = true)
| | |-- Name: string (nullable = true)
| | |-- Enabled: boolean (nullable = true)
| | |-- IsCampaign: boolean (nullable = true)
|-- GroupEntities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- EntityId: integer (nullable = true)
| | |-- GroupId: integer (nullable = true)
|-- Indicators: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- AccessLevel: string (nullable = true)
| | |-- CanBeCopied: boolean (nullable = true)
| | |-- Definition: string (nullable = true)
| | |-- ModeReporting: string (nullable = true)
| | |-- NameEN: string (nullable = true)
| | |-- NameFR: string (nullable = true)
| | |-- Order: integer (nullable = true)
| | |-- Perimeter: string (nullable = true)
| | |-- PeriodTypeEN: string (nullable = true)
| | |-- PeriodTypeFR: string (nullable = true)
| | |-- PeriodTypeId: integer (nullable = true)
| | |-- SubDomainId: integer (nullable = true)
| | |-- Type: string (nullable = true)
| | |-- Enabled: boolean (nullable = true)
| | |-- OversightIndicatorID: string (nullable = true)
|-- IndicatorEntities: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- EntityId: integer (nullable = true)
| | |-- IndicatorId: integer (nullable = true)
|-- SubDomains: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- Comment: string (nullable = true)
| | |-- Name: string (nullable = true)
| | |-- Order: integer (nullable = true)
| | |-- Enabled: boolean (nullable = true)
| | |-- DomainId: integer (nullable = true)
|-- SubIndicators: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- IndicatorId: integer (nullable = true)
| | |-- NameEN: string (nullable = true)
| | |-- NameFR: string (nullable = true)
| | |-- Order: integer (nullable = true)
| | |-- Type: string (nullable = true)
| | |-- Unit: string (nullable = true)
| | |-- ValueListNameId: integer (nullable = true)
| | |-- IsMandatory: boolean (nullable = true)
| | |-- IsGDPR: boolean (nullable = true)
| | |-- OversightSubIndicatorID: string (nullable = true)
|-- ValueLists: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- NameEN: string (nullable = true)
| | |-- NameFR: string (nullable = true)
| | |-- Value: integer (nullable = true)
| | |-- ValueListNameId: integer (nullable = true)
|-- ValueListNames: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- NameEN: string (nullable = true)
| | |-- NameFR: string (nullable = true)
|-- Comments: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- Id: integer (nullable = true)
| | |-- Code: string (nullable = true)
| | |-- Definition: string (nullable = true)
|-- CommentValues: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- CampaignId: integer (nullable = true)
| | |-- CommentId: integer (nullable = true)
| | |-- Value: string (nullable = true)
打印数据框:
+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+
|AppUsers |BusinessLines|Campaigns|CampaignDomains|CampaignDomainEntityComments|CampaignEntities |CampaignDomainEntities |Domains |Entities |Groups |GroupEntities|Indicators |IndicatorEntities|SubDomains |SubIndicators |ValueLists |ValueListNames |Comments |CommentValues |
+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+
|[[1,null,JEROEN,SOMERS,JEROEN.SOMERS]]|[[1,PRIV]] |null |[[1,2]] |[[122,1,9,add comments ]] |[[1,1,2018-08-24,null]]|[[1,11,1,Draft,null,null]]|[[1,1,1,INCIDENTS,1,true]]|[[1,0071300000,Outil central (FORCE),1,SGPB MONACO GESTION PRIVEE,PRIV,000423,PRIV Monaco,Outil central (FORCE),true,0071300000,SOCIETE GENERALE PRIVATE BANKING (MONACO),20664,PRIV/MON]]|[[1,1,null,SGPB GROUPE,true,false]]|[[1,1]] |[[18174,D3E_I1,EndUser,false,Rappel : les instructions transposées doivent être validées par la Conformité IBFS avant d'être soumises à la validation du Management de votre entité.,Flow,IBFS 000449 - IBFS Compliance Manual - published on 01/29/2015,IBFS 000449 - Manuel de conformité IBFS - publié le 29/01/2015,1,Global,Monthly,Mensuel,1,440,Complex,true,FCC.1.1]]|[[1,1]] |[[1,18,null,Key Points,1,true,18]]|[[1,18.1,1,Entity,Entity,111,Text,,null,false,false,FCC.1.1.1]]|[[1,Discretionary management,Discretionary management,1,1]]|[[1,Compliance Item,Compliance Item]]|[[4,Priv-1,null]]|[[13,4,112323 ]]|
+--------------------------------------+-------------+---------+---------------+----------------------------+-----------------------+--------------------------+--------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------+----------------------------------+---------------------------------------------------------------+-----------------------------------------------------------+-------------------------------------+-----------------+----------------+
指标中的描述带有换行符和一些不需要的字符,例如“,;
我想从描述子列中删除那些不需要的字符,并保持其结构不变 我已经使用扁平结构完成了此操作,但嵌套结构似乎令人困惑
为简单起见,我删除了大多数字段,仅保留了要对其应用转换的字段
示例输入:
{
"AppUsers": [
{
"Id": 1,
"UserName": "abc.bcd",
}
],
"Indicators": [
{
"Definition": "Rappel ;;;;; , \n",
}
]
}
预期输出:
{
"AppUsers": [
{
"Id": 1,
"UserName": "abc.bcd",
}
],
"Indicators": [
{
"Definition": "Rappel",
}
]
}
不需要的字符必须从Indicators.Definition列中删除 请帮助
答案 0 :(得分:0)
也许您可以尝试访问您的列,并使用regexp_replace删除不需要的字符。以下是一个示例。
df = df.withColumn('Definition', regexp_replace(col('Indicators').getItem(4)), "/[~%&\\;:"',<>?#\s]/g",""))