我有一个panda var app = angular.module('MyApp', ['ngRoute']);
// configure our routes
app.config(function ($routeProvider) {
$routeProvider
// route for the home page
.when('/', {
templateUrl: 'AddHeader.html',
controller: 'headerCtrl'
})
.when('/AddHeader', {
templateUrl: 'AddHeader.html',
controller: 'headerCtrl'
})
// route for the about page
.when('/ProjectIDCreation', {
templateUrl: 'ProjectIDCreation.html',
controller: 'headerCtrl'
})
});
var baseAddress = 'http://localhost:49754/api/TimeSheet/';
var url = "";
app.factory('userFactory', function ($http) {
return {
getHeadersList: function () {
url = baseAddress + "FetchHeaderDetails";
return $http.get(url);
},
addHeader: function (user) {
url = baseAddress + "InsertHeaderDetails";
return $http.post(url, user);
},
updateHeader: function (user) {
url = baseAddress + "UpdateHeaderDetails";
return $http.put(url, user);
}
};
});
//var app = angular.module('MyApp');
app.controller('headerCtrl', function PostController($scope, userFactory) {
$scope.users = [];
$scope.user = null;
$scope.editMode = false;
//Fetch all Headers
$scope.getAll = function () {
userFactory.getHeadersList().success(function (data) {
$scope.users = data;
}).error(function (data) {
$scope.error = "An Error has occured while Loading users! " + data.ExceptionMessage;
});
};
//Add Header
$scope.add = function () {
var currentUser = this.user;
userFactory.addHeader(currentUser).success(function (data) {
$scope.addMode = false;
currentUser.HeaderID = data;
$scope.users.push(currentUser);
$scope.user = null;
$('#userModel').modal('hide');
}).error(function (data) {
$scope.error = "An Error has occured while Adding user! " + data.ExceptionMessage;
});
};
//Edit Header
$scope.edit = function () {
$scope.user = this.user;
$scope.editMode = true;
$('#userModel').modal('show');
};
//Update Header
$scope.update = function () {
var currentUser = this.user;
userFactory.updateHeader(currentUser).success(function (data) {
currentUser.editMode = false;
$('#userModel').modal('hide');
}).error(function (data) {
$scope.error = "An Error has occured while Updating user! " + data.ExceptionMessage;
});
};
//Model popup events
$scope.showadd = function () {
$scope.user = null;
$scope.editMode = false;
$('#userModel').modal('show');
};
$scope.showedit = function () {
$('#userModel').modal('show');
};
$scope.cancel = function () {
$scope.user = null;
$('#userModel').modal('hide');
}
// initialize your users data
$scope.getAll();
});
,时间序列在df
,而column1
中有一个布尔条件。这描述了满足特定条件的连续时间间隔。请注意,时间间隔长度不等。
column2
如何计算满足此条件的整个系列中的时间间隔总数?
所需的输出应如下所示:
Timestamp Boolean_condition
1 1
2 1
3 0
4 1
5 1
6 1
7 0
8 0
9 1
10 0
答案 0 :(得分:3)
您可以尝试以下方法:
1)获取True
实例(此处为1)的所有值,其中包含isone
2)获取相应的索引集并将其转换为系列表示,以便新系列将其索引和值都作为先前计算的索引。执行连续行之间的差异并检查它们是否等于1.这将成为我们的布尔掩码。
3)将isone
与获得的布尔掩码进行比较,每当它们不相等时,我们采用它们的累积和(也称为元素之间的邻接检查)。这些帮助我们进行分组。
4)使用loc
作为isone
的索引,我们将将grp
数组更改为分类格式后计算的代码分配给创建的新列 Event_number
isone = df.Bolean_condition[df.Bolean_condition.eq(1)]
idx = isone.index
grp = (isone != idx.to_series().diff().eq(1)).cumsum()
df.loc[idx, 'Event_number'] = pd.Categorical(grp).codes + 1
更快的方法:
仅使用numpy
:
1)获取它的数组表示。
2)计算非零,此处(1's
)索引。
3)在此数组的开头插入NaN
,这将作为我们在考虑连续行时执行差异的起点。
4)初始化一个填充了与原始数组形状相同的Nan's
的新数组。
5)每当连续行之间的差异不等于1时,我们取其累积总和,否则它们属于同一组。这些值在之前有1's
的指数处被估算。
6)将这些分配回新列。
def nick(df):
b = df.Bolean_condition.values
slc = np.flatnonzero(b)
slc_pl_1 = np.append(np.nan, slc)
nan_arr = np.full(b.size, fill_value=np.nan)
nan_arr[slc] = np.cumsum(slc_pl_1[1:] - slc_pl_1[:-1] != 1)
df['Event_number'] = nan_arr
return df
<强> 时序: 强>
对于10,000行的DF
:
np.random.seed(42)
df1 = pd.DataFrame(dict(
Timestamp=np.arange(10000),
Bolean_condition=np.random.choice(np.array([0,1]), 10000, p=[0.4, 0.6]))
)
df1.shape
# (10000, 2)
def jez(df):
mask0 = df.Bolean_condition.eq(0)
mask2 = df.Bolean_condition.ne(df.Bolean_condition.shift(1))
df['Event_number'] = (mask2 & mask0).cumsum().mask(mask0)
return (df)
nick(df1).equals(jez(df1))
# True
%%timeit
nick(df1)
1000 loops, best of 3: 362 µs per loop
%%timeit
jez(df1)
100 loops, best of 3: 1.56 ms per loop
对于包含100万行的DF
:
np.random.seed(42)
df1 = pd.DataFrame(dict(
Timestamp=np.arange(1000000),
Bolean_condition=np.random.choice(np.array([0,1]), 1000000, p=[0.4, 0.6]))
)
df1.shape
# (1000000, 2)
nick(df1).equals(jez(df1))
# True
%%timeit
nick(df1)
10 loops, best of 3: 34.9 ms per loop
%%timeit
jez(df1)
10 loops, best of 3: 50.1 ms per loop
答案 1 :(得分:3)
您可以使用cumsum
两个Series
创建masks
,然后按功能Series.mask
创建NaN
:
mask0 = df.Boolean_condition.eq(0)
mask2 = df.Boolean_condition.ne(df.Boolean_condition.shift(1))
print ((mask2 & mask0).cumsum().add(1))
0 1
1 1
2 2
3 2
4 2
5 2
6 3
7 3
8 3
9 4
Name: Boolean_condition, dtype: int32
df['Event_number'] = (mask2 & mask0).cumsum().add(1).mask(mask0)
print (df)
Timestamp Boolean_condition Event_number
0 1 1 1.0
1 2 1 1.0
2 3 0 NaN
3 4 1 2.0
4 5 1 2.0
5 6 1 2.0
6 7 0 NaN
7 8 0 NaN
8 9 1 3.0
9 10 0 NaN
<强>计时强>:
#[100000 rows x 2 columns
df = pd.concat([df]*10000).reset_index(drop=True)
df1 = df.copy()
df2 = df.copy()
def nick(df):
isone = df.Boolean_condition[df.Boolean_condition.eq(1)]
idx = isone.index
grp = (isone != idx.to_series().diff().eq(1)).cumsum()
df.loc[idx, 'Event_number'] = pd.Categorical(grp).codes + 1
return df
def jez(df):
mask0 = df.Boolean_condition.eq(0)
mask2 = df.Boolean_condition.ne(df.Boolean_condition.shift(1))
df['Event_number'] = (mask2 & mask0).cumsum().add(1).mask(mask0)
return (df)
def jez1(df):
mask0 = ~df.Boolean_condition
mask2 = df.Boolean_condition.ne(df.Boolean_condition.shift(1))
df['Event_number'] = (mask2 & mask0).cumsum().add(1).mask(mask0)
return (df)
In [68]: %timeit (jez1(df))
100 loops, best of 3: 6.45 ms per loop
In [69]: %timeit (nick(df1))
100 loops, best of 3: 12 ms per loop
In [70]: %timeit (jez(df2))
100 loops, best of 3: 5.34 ms per loop
答案 2 :(得分:1)
自定义功能可以解决问题。这是Matlab代码中的解决方案:
Boolean_condition = [1 1 0 1 1 1 0 0 1 0];
Event_number = [NA NA NA NA NA NA NA NA NA NA];
loop_event_number = 1;
for timestamp=1:10
if Boolean_condition(timestamp)==1
Event_number(timestamp) = loop_event_number;
last_event_number = loop_event_number;
else
loop_event_number = last_event_number +1;
end
end
% Event_number = 1 1 NA 2 2 2 NA NA 3 NA
答案 3 :(得分:1)
这应该可以工作,但对于很长的df可能有点慢。
df = pd.concat([df,pd.Series([0]*len(df), name = '2')], axis = 1)
if df.iloc[0,1] == 1:
counter = 1
df.iloc[0, 2] = counter
else:
counter = 0
df.iloc[0,2] = 0
previous = df.iloc[0,1]
for y,x in df.iloc[1:,].iterrows():
print(y)
if x[1] == 1 and previous == 1:
previous = x[1]
df.iloc[y, 2] = counter
if x[1] == 0:
previous = x[1]
df.iloc[y,2] = 0
if x[1] == 1 and previous == 0:
counter += 1
previous = x[1]
df.iloc[y,2] = counter