Question

我有一个看起来像

的数据集datA

ID     LINE_ID     Fromdate     Todate     UniqueID
1      1           1/1/2015     2/3/2015   11
1      2           3/7/2015     3/9/2015   12
2      1           2/2/2015     2/8/2015   21
3      1           1/3/2013     1/8/2013   31

datB看起来像

ID    LINE_ID     Date     UniqueID
1     1           1/1/2015 11
1     2           1/3/2015 12
1     3           2/2/2015 13
1     4           2/8/2015 14
1     5           3/8/2015 15
2     1           2/2/2015 21
2     2           2/3/2015 22
2     3           2/7/2015 23
2     4           2/8/2015 24
3     1           1/3/2013 31
3     2           1/7/2013 32
3     3           1/8/2013 33
3     4           1/9/2013 34

我想要做的是找到一种方法来合并两个数据集或找到一些方法来标记UniqueID中datA属于datB中的哪一行。例如，使用上面的两个数据集，我希望datC看起来像datB，其中添加了一列，UniqueID中的datA对应。

ID    LINE_ID     Date     UniqueID     UniqueID.A
1     1           1/1/2015 11           11
1     2           1/3/2015 12           11
1     3           2/2/2015 13           11
1     4           2/8/2015 14           NA
1     5           3/8/2015 15           12
2     1           2/2/2015 21           21
2     2           2/3/2015 22           21
2     3           2/7/2015 23           21
2     4           2/8/2015 24           21
3     1           1/3/2013 31           31
3     2           1/7/2013 32           31
3     3           1/8/2013 33           31
3     4           1/9/2013 34           NA

有人可以看到，添加到datB的新列是UniqueID datA datB中func didBeginContact(contact: SKPhysicsContact) { let firstBody: SKPhysicsBody let secondBody: SKPhysicsBody if contact.bodyA.categoryBitMask < contact.bodyB.categoryBitMask { firstBody = contact.bodyA secondBody = contact.bodyB } else { firstBody = contact.bodyB secondBody = contact.bodyA } if (firstBody.categoryBitMask == PhysicsCategory.Goblin) && (secondBody.categoryBitMask == PhysicsCategory.Bullet) { collisionWithBullet(firstBody.node as? SKSpriteNode, bullet: secondBody.node as? SKSpriteNode) } if (firstBody.categoryBitMask == PhysicsCategory.Goblin) && (secondBody.categoryBitMask == PhysicsCategory.player) { collisionWithPlayer(firstBody.node as? SKSpriteNode, player: secondBody.node as? SKSpriteNode) } } func collisionWithBullet(goblin: SKSpriteNode?, bullet:SKSpriteNode?){ guard let goblin = goblin, bullet = bullet else { return } goblin.removeFromParent() bullet.removeFromParent() score += 1 scoreLbl.text = "\(score)" if let explosion = SKEmitterNode(fileNamed: "Goblin Death Animation.sks") { explosion.particlePosition = goblin.position self.addChild(explosion) } if let fire = SKEmitterNode(fileNamed: "Goblin Death Animation 2.sks") { fire.particlePosition = goblin.position self.addChild(fire) } } func collisionWithPlayer(goblin: SKSpriteNode?, player: SKSpriteNode?){ guard let goblin = goblin, player = player else { return } let scoreDefault = NSUserDefaults.standardUserDefaults() scoreDefault.setValue(score, forKey: "Score") // synchronised not needed anymore if (score > highscore){ let highscoreDefault = NSUserDefaults.standardUserDefaults() highscoreDefault.setValue(score, forKey: "Highscore") } goblin.removeFromParent() player.removeFromParent() self.view?.presentScene(EndScene()) scoreLbl.removeFromSuperview() }的日期范围。{/ p>

有人对如何在R中执行此操作有任何指导吗？

Answer 1

帮助实施。确保列是正确的类型并设置了键：

#Create a start and end columns formatted to dates
setDT(datB)[, ':='(c("start", "end"), as.IDate(Date, format="%m/%d/%Y"))]

#Format columns to dates
setDT(datA)[, ':='(c("Fromdate", "Todate"), 
                   lapply(.SD, as.IDate, format="%m/%d/%Y")),
            .SDcols=c("Fromdate", "Todate")]

#Set keys for matching intervals on
setkey(datA, Fromdate, Todate)
setkey(datB, start, end)

#Match on intervals
foverlaps(datB, datA, type="within")

Answer 2

我编写了一个函数，可用于基于范围合并熊猫数据框： https://github.com/Chrisebell24/util/blob/master/custom_merge

import numpy as np
import pandas as pd

def merge_range(left, right, left_val, right_low, right_high):
    '''
    Parameters
    ----------
    left : pandas dataframe
        Left DataFrame that you are joining onto
    right : pandas dataframe
        Right DataFrame that has the ranged values
    left_val : list
        list of columns from left that contains numeric values
    right_low : list
        list of columns that you are matching to left_val
    right_high : list
        list of columns that you are matching to left_val

    Returns
    -------
    Pandas dataframe

    Example
    -------
    import numpy as np

    np.random.seed(69)
    df1, df2 = pd.DataFrame(), pd.DataFrame()
    df1['LETTERS'] = ['a', 'b','c','d','e','f', 'g']
    df1['RAND'] = np.random.randint(1,len(df1)*10, len(df1))
    df2['COLOR'] = ['red', 'orange','yellow','green','blue','purple','brown']
    df2['LOW'] = [i*10 for i in range(0, len(df2))]
    df2['HIGH'] = [i*10 for i in range(1, 1+len(df2))]

    merge_range(df1, df2, left_val=['RAND'], right_low=['LOW'], right_high=    ['HIGH'])

        LETTERS RAND    COLOR   LOW HIGH
    0   a       55      purple  50  60
    1   b       56      purple  50  60
    2   c       21      yellow  20  30
    3   d       50      purple  50  60
    4   e       23      yellow  20  30
    5   f       10      orange  10  20
    6   g       57      purple  50  60
    '''
    true_args = True

    for lv, rl, rh in zip(left_val, right_low, right_high):
        a = left[lv].values
        bl = right[rl].values
        bh = right[rh].values

        true_args *= (a[:, None] >= bl) & (a[:, None] < bh)

    i, j = np.where(true_args)

    df = pd.DataFrame(
        np.column_stack([left.values[i], right.values[j]]),
        columns=left.columns.append(right.columns)
    )

    if len(df.columns) - len(set(df.columns)) > 0:
        print('Duplicate columns')

    return df

范围和单个日期之间的日期合并

2 个答案: