熊猫根据列表中的值过滤行

时间:2019-06-06 19:30:01

标签: python pandas dataframe

给出df

DECLARE @StartDate AS DateTime = '1/1/2017'

IF OBJECT_ID('tempdb..##ProducerCommBridge') IS NOT NULL
  DROP TABLE ##ProducerCommBridge;

SELECT DISTINCT [ProducerCommBridgeKey]
      ,[PolicyAgreementNumber]
      ,[CommissionStartDate]
      ,[CommissionEndDate]
  INTO ##ProducerCommBridge
FROM [dbo].[DimProducerCommBridge]
;

IF OBJECT_ID('tempdb..##FctLossData') IS NOT NULL
  DROP TABLE ##FctLossData;

CREATE TABLE ##FctLossData (
    [QIPS_LPRG_SID] numeric(10,0) null,
    [PACClaimID] nvarchar(50) NULL,
    [EquipmentID] int null,
    [ClaimID] [int] NOT NULL,
    [ClaimNumber] [varchar](34) NOT NULL,
    [CreateDate] [datetime] NOT NULL,
    [LossDate] [datetime] NOT NULL,
    [ProCloseDate] [datetime] NULL,
    [ProducerCommBridgeKey] [int] NOT NULL,
    [ClaimStatus] [varchar](12) NULL,
    [ContractNumber] [varchar](30) NULL,
    [PolicyID] [varchar](20) NULL,
    [CustomerID] [nvarchar](15) NULL,
    [ProductID] [nvarchar](5) NULL,
    [CatastropheID] [int] NULL,
    [InsuranceClassCode] [varchar](1000) NULL,
    [EquipmentName] [varchar](1000) NULL,
    [EquipmentClassName] [varchar](1000) NULL,
    [EquipmentDescription] [nvarchar](1000) NULL,
    [RegionCode] [nvarchar](2) NULL,
    [CountryCode] [nvarchar](3) NULL,
    [ClaimLossType] [nvarchar](50) NULL,
    [CauseOfLoss] [nvarchar](255) NULL,
    [EnrollmentID] [nvarchar](50) NULL,
    [CoverageID] [int] NULL,
    [CoverageEffectiveDate] [datetime] NULL,
    [CustomerAccountNumber] [nvarchar](50) NULL,
    [PaidLossAmount] [money] NULL,
    [CaseReserveAmount] [money] NULL,
    [RecoveryReserveAmount] [money] NULL,
    [SalvageRecoveryAmount] [money] NULL,
    [SubroRecoveryAmount] [money] NULL,
    [TotalRecoveryAmount] [money] NULL,
    [AllocatedLAEAmount] [money] NULL
)
;

INSERT INTO ##FctLossData

--DECLARE @StartDate AS DateTime = '1/1/2017'

SELECT
  qle.LPRG_SID
  ,lc.PACClaimID
  ,ems.EquipmentID
  ,c.ClaimID
  ,(c.ClaimPrefix + '-' + CONVERT(varchar, c.ClaimNumber)) AS ClaimNumber
  ,CONVERT(date, CreateDate) AS CreateDate
  ,CONVERT(date, c.LossDate) AS LossDate
  ,CASE WHEN DATEPART(hh, EffectiveDate) < 17 then CONVERT(date, EffectiveDate) ELSE CONVERT(date, dateadd(dd, 1, EffectiveDate)) END as ProCloseDate
  ,-1 AS ProducerCommBridgeKey
  ,cs.ClaimStatus
  ,UPPER(LEFT(c.ContractNumber, 10)) AS ContractNumber
  ,CASE WHEN c.ContractNumber IS NOT NULL THEN LEFT(c.PolicyNumber, 7) ELSE c.PolicyNumber END AS PolicyID
  ,sCustomerID AS CustomerID
  ,CASE WHEN pro.sProductID = '9095' THEN '1095' ELSE pro.sProductID END AS ProductID
  ,c.CatastropheID
  ,CASE WHEN LTRIM(RTRIM(dbo.fn_clean_string(InsuranceClassCode))) = '' THEN 'UNKNOWN' ELSE COALESCE(UPPER(LTRIM(RTRIM(dbo.fn_clean_string(InsuranceClassCode)))), 'UNKNOWN')  END AS InsuranceClassCode
  ,CASE WHEN LTRIM(RTRIM(dbo.fn_clean_string(EquipmentName))) = '' THEN 'UNKNOWN' ELSE COALESCE(UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentName)))), 'UNKNOWN')  END AS EquipmentName
  ,CASE WHEN EquipmentClassName IS NULL THEN 'UNKNOWN' ELSE UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentClassName)))) END AS EquipmentClassName
  ,CASE WHEN EquipmentDescription IS NULL THEN 'UNKNOWN' ELSE UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentDescription)))) END AS EquipmentDescription
  ,RegionCode
  ,CountryCode
  ,CASE WHEN ClaimLossType IS NULL THEN 'Unknown' ELSE ClaimLossType END AS ClaimLossType
  ,CASE WHEN CauseOfLoss IS NULL THEN 'Unknown' ELSE CauseOfLoss END AS CauseOfLoss
  ,CASE WHEN qc.CLAM_LPRG_SID IS NOT NULL THEN CONVERT(nvarchar(50), qle.LEAS_SID) WHEN lc.EnrollmentID IS NOT NULL THEN CONVERT(nvarchar(50), lc.EnrollmentID) ELSE NULL END AS EnrollmentID
  ,NULL AS CoverageID
  ,NULL AS CoverageEffectiveDate
  ,CASE WHEN qc.CLAM_LPRG_SID IS NOT NULL THEN CONVERT(nvarchar(50), qle.LEAS_LEASE_CODE) WHEN lc.EnrollmentID IS NOT NULL THEN CONVERT(nvarchar(50), lefs.ContractNumber) ELSE NULL END AS CustomerAccountNumber
  ,SUM(CASE WHEN Reserve = 0 AND Recovery = 0 AND TranTypeID = 1 THEN Amount END) AS PaidLossAmount
  ,SUM(CASE WHEN Reserve = 1 AND Recovery = 0 AND TranTypeID = 1 THEN Amount END) AS CaseReserveAmount
  ,SUM(CASE WHEN Reserve = 1 AND Recovery = 1 AND TranTypeID IN (1, 7, 8) THEN Amount END) AS RecoveryReserveAmount
  ,SUM(CASE WHEN Reserve = 0 AND Recovery = 1 AND TranTypeID = 8 THEN Amount END) AS SalvageRecoveryAmount
  ,SUM(CASE WHEN Reserve = 0 AND Recovery = 1 AND TranTypeID = 7 THEN Amount END) AS SubroRecoveryAmount
  ,SUM(CASE WHEN Reserve = 0 AND Recovery = 1 AND TranTypeID IN (1, 7, 8) THEN Amount END) AS TotalRecoveryAmount
  ,SUM(CASE WHEN Reserve = 0 AND Recovery = 0 AND TranTypeID = 6 THEN Amount END) AS AllocatedLAEAmount     
  FROM dbo.ExtPACTransaction t
  INNER JOIN dbo.ExtPACClaim c ON t.ClaimID = c.ClaimID
                              --AND c.ClaimNumber NOT LIKE '836%'
  INNER JOIN dbo.ExtPRSDEPolicy pol ON (CASE WHEN [ContractNumber] IS NOT NULL THEN LEFT(c.PolicyNumber, 7) ELSE c.PolicyNumber END) = pol.sPolicyID
  INNER JOIN dbo.ExtPRSDECustomer cus ON pol.iCustomerKey = cus.iCustomerKey
  INNER JOIN dbo.ExtPRSDEProduct pro ON pol.iProductKey = pro.iProductKey
  LEFT OUTER JOIN dbo.ExtPACClaimStatus cs ON c.ClaimID = cs.ClaimID
  LEFT OUTER JOIN dbo.ExtJWProductSegment jw ON pro.sProductID = jw.sProductID
  LEFT OUTER JOIN dbo.ExtEMSEquipment ems ON c.EMIEquipmentID = CONVERT(varchar, ems.EquipmentNumber)
  LEFT OUTER JOIN dbo.ExtQIPSClaims qc ON qc.CLAM_PAC_CLAIMID = c.ClaimID
  LEFT OUTER JOIN dbo.ExtQIPSLeaseEnrollment qle ON qle.LPRG_SID = qc.CLAM_LPRG_SID
  LEFT OUTER JOIN dbo.ExtLP2Claim lc ON lc.PACClaimID = c.ClaimID
  LEFT OUTER JOIN dbo.ExtLP2EnrollmentForSearch lefs ON lefs.EnrollmentID = lc.EnrollmentID
  WHERE c.LossDate >= @StartDate
  --AND c.ClaimNumber NOT LIKE '836%' -- To remove FIS claims
  --AND c.ClaimNumber NOT IN (SELECT c2.ClaimNumber FROM ExtPACClaim c2 WHERE ClaimNumber LIKE '836%')
  AND c.isVoided = 0
  AND (pro.sProductID = '9095' OR jw.sProductID IS NOT NULL)
  GROUP BY qle.LPRG_SID
    ,lc.PACClaimID
    ,ems.EquipmentID
    ,c.ClaimID
    ,(c.ClaimPrefix + '-' + CONVERT(varchar, c.ClaimNumber)) 
    ,CONVERT(date, CreateDate) 
    ,CONVERT(date, c.LossDate) 
    ,CASE WHEN DATEPART(hh, t.EffectiveDate) < 17 then CONVERT(date, t.EffectiveDate) ELSE CONVERT(date, dateadd(dd, 1, t.EffectiveDate)) END
    ,cs.ClaimStatus
    , UPPER(LEFT(c.ContractNumber, 10))
    ,CASE WHEN c.ContractNumber IS NOT NULL THEN LEFT(c.PolicyNumber, 7) ELSE c.PolicyNumber END 
    ,sCustomerID 
    ,CASE WHEN pro.sProductID = '9095' THEN '1095' ELSE pro.sProductID END 
    ,c.CatastropheID
    ,CASE WHEN LTRIM(RTRIM(dbo.fn_clean_string(InsuranceClassCode))) = '' THEN 'UNKNOWN' ELSE COALESCE(UPPER(LTRIM(RTRIM(dbo.fn_clean_string(InsuranceClassCode)))), 'UNKNOWN')  END
    ,CASE WHEN LTRIM(RTRIM(dbo.fn_clean_string(EquipmentName))) = '' THEN 'UNKNOWN' ELSE COALESCE(UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentName)))), 'UNKNOWN')  END
    ,CASE WHEN EquipmentClassName IS NULL THEN 'UNKNOWN' ELSE UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentClassName)))) END
    ,CASE WHEN EquipmentDescription IS NULL THEN 'UNKNOWN' ELSE UPPER(LTRIM(RTRIM(dbo.fn_clean_string(EquipmentDescription)))) END
    ,RegionCode
    ,CountryCode
    ,CASE WHEN ClaimLossType IS NULL THEN 'Unknown' ELSE ClaimLossType END
    ,CASE WHEN CauseOfLoss IS NULL THEN 'Unknown' ELSE CauseOfLoss END 
    ,CASE WHEN qc.CLAM_LPRG_SID IS NOT NULL THEN CONVERT(nvarchar(50), qle.LEAS_SID) WHEN lc.EnrollmentID IS NOT NULL THEN CONVERT(nvarchar(50), lc.EnrollmentID) ELSE NULL END
    ,CASE WHEN qc.CLAM_LPRG_SID IS NOT NULL THEN CONVERT(nvarchar(50), qle.LEAS_LEASE_CODE) WHEN lc.EnrollmentID IS NOT NULL THEN CONVERT(nvarchar(50), lefs.ContractNumber) ELSE NULL END

  --ORDER BY ClaimNumber DESC
;

--To view records
SELECT *
FROM ##FctLossData
WHERE ClaimNumber LIKE '836%'

以及以下列表

df=pd.DataFrame([[0.23,"A1",10],[5.36,"A2",55],[8.8,"A3",4]],
            index=list('pqr'),
            columns=list('abc'))

我想从DF打印值与列表中任何值匹配的行,对于df和上述列表,我希望输出类似于

lst = ['A30','A50','A2','A0']

但是我最接近的是这个

Out[169]: 
    a    b   c
q  5.36  A2  55

“ NaN”是执行“ df [mask]”的结果,但我不明白为什么如果我正确地获得了想要的输出,为什么也没有。

有没有办法实现我上面提到的/下一个输出样本?

mask=df.isin(lst)
df[mask].dropna(axis=0,how="all")
Out[170]: 
    a   b   c
q NaN  A2 NaN

2 个答案:

答案 0 :(得分:4)

只需对any的结果应用isin

df[df.isin(lst).any(axis=1)]

      a   b   c
q  5.36  A2  55

我在How to implement 'in' and 'not in' for Pandas dataframe下的“许多列过滤器”部分下对此进行了详细介绍。

答案 1 :(得分:3)

使用列表理解:

[print(x) for x in [df[df[col].isin(lst)] for col in list(df)] if len(x)>0]

输出:

      a   b   c
q  5.36  A2  55