我有一个数据框,其中包含(在随机位置)我要用"foo"
替换的字符值(比如NA
)。
在整个数据框架中这样做的最佳方式是什么?
答案 0 :(得分:77)
此:
df[ df == "foo" ] <- NA
答案 1 :(得分:65)
将这一点扼杀在萌芽状态的一种方法是在首先读取数据时将该字符转换为NA。
df <- read.csv("file.csv", na.strings = c("foo", "bar"))
答案 2 :(得分:4)
另一个选项是is.na<-
:
is.na(df) <- df == "foo"
请注意,它的使用可能看起来有点违反直觉,但它实际上将 NA
值分配给右侧索引处的df
。
答案 3 :(得分:2)
使用dplyr::na_if
,可以将特定值替换为NA
。在这种情况下,应该是"foo"
。
library(dplyr)
set.seed(1234)
df <- data.frame(
id = 1:6,
x = sample(c("a", "b", "foo"), 6, replace = T),
y = sample(c("c", "d", "foo"), 6, replace = T),
z = sample(c("e", "f", "foo"), 6, replace = T),
stringsAsFactors = F
)
df
#> id x y z
#> 1 1 a c e
#> 2 2 b c foo
#> 3 3 b d e
#> 4 4 b d foo
#> 5 5 foo foo e
#> 6 6 b d e
na_if(df$x, "foo")
#> [1] "a" "b" "b" "b" NA "b"
如果您需要对多列进行此操作,则可以从"foo"
到mutate_at
。
df %>%
mutate_at(vars(x, y, z), na_if, "foo")
#> id x y z
#> 1 1 a c e
#> 2 2 b c <NA>
#> 3 3 b d e
#> 4 4 b d <NA>
#> 5 5 <NA> <NA> e
#> 6 6 b d e
答案 4 :(得分:1)
可以使用dplyr::mutate_all()
和replace
:
library(dplyr)
df <- data_frame(a = c('foo', 2, 3), b = c(1, 'foo', 3), c = c(1,2,'foobar'), d = c(1, 2, 3))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 foo 1 1 1
2 2 foo 2 2
3 3 3 foobar 3
df <- mutate_all(df, funs(replace(., .=='foo', NA)))
> df
# A tibble: 3 x 4
a b c d
<chr> <chr> <chr> <dbl>
1 <NA> 1 1 1
2 2 <NA> 2 2
3 3 3 foobar 3
另一个dplyr
选项是:
df <- na_if(df, 'foo')
答案 5 :(得分:0)
另一种解决方法如下:
for (i in 1:ncol(DF)){
DF[which(DF[,i]==""),columnIndex]<-"ALL"
FinalData[which(is.na(FinalData[,columnIndex])),columnIndex]<-"ALL"
}
答案 6 :(得分:0)
假设您不知道列名或要选择的列数很多,import matplotlib.pyplot as plt
import numpy as np
import scipy.special
show_animation = True
def calc_4points_bezier_path(sx, sy, syaw, ex, ey, eyaw, offset):
"""
Compute control points and path given start and end position.
:param sx: (float) x-coordinate of the starting point
:param sy: (float) y-coordinate of the starting point
:param syaw: (float) yaw angle at start
:param ex: (float) x-coordinate of the ending point
:param ey: (float) y-coordinate of the ending point
:param eyaw: (float) yaw angle at the end
:param offset: (float)
:return: (numpy array, numpy array)
"""
dist = np.sqrt((sx - ex) ** 2 + (sy - ey) ** 2) / offset
control_points = np.array(
[[sx, sy],
[sx + dist * np.cos(syaw), sy + dist * np.sin(syaw)],
[ex - dist * np.cos(eyaw), ey - dist * np.sin(eyaw)],
[ex, ey]])
path = calc_bezier_path(control_points, n_points=100)
return path, control_points
def calc_bezier_path(control_points, n_points=100):
"""
Compute bezier path (trajectory) given control points.
:param control_points: (numpy array)
:param n_points: (int) number of points in the trajectory
:return: (numpy array)
"""
traj = []
for t in np.linspace(0, 1, n_points):
traj.append(bezier(t, control_points))
return np.array(traj)
def bernstein_poly(n, i, t):
"""
Bernstein polynom.
:param n: (int) polynom degree
:param i: (int)
:param t: (float)
:return: (float)
"""
return scipy.special.comb(n, i) * t ** i * (1 - t) ** (n - i)
def bezier(t, control_points):
"""
Return one point on the bezier curve.
:param t: (float) number in [0, 1]
:param control_points: (numpy array)
:return: (numpy array) Coordinates of the point
"""
n = len(control_points) - 1
return np.sum([bernstein_poly(n, i, t) * control_points[i] for i in range(n + 1)], axis=0)
def bezier_derivatives_control_points(control_points, n_derivatives):
"""
Compute control points of the successive derivatives of a given bezier curve.
A derivative of a bezier curve is a bezier curve.
See https://pomax.github.io/bezierinfo/#derivatives
for detailed explanations
:param control_points: (numpy array)
:param n_derivatives: (int)
e.g., n_derivatives=2 -> compute control points for first and second derivatives
:return: ([numpy array])
"""
w = {0: control_points}
for i in range(n_derivatives):
n = len(w[i])
w[i + 1] = np.array([(n - 1) * (w[i][j + 1] - w[i][j])
for j in range(n - 1)])
return w
def curvature(dx, dy, ddx, ddy):
"""
Compute curvature at one point given first and second derivatives.
:param dx: (float) First derivative along x axis
:param dy: (float)
:param ddx: (float) Second derivative along x axis
:param ddy: (float)
:return: (float)
"""
return (dx * ddy - dy * ddx) / (dx ** 2 + dy ** 2) ** (3 / 2)
def plot_arrow(x, y, yaw, length=1.0, width=0.5, fc="r", ec="k"): # pragma: no cover
"""Plot arrow."""
if not isinstance(x, float):
for (ix, iy, iyaw) in zip(x, y, yaw):
plot_arrow(ix, iy, iyaw)
else:
plt.arrow(x, y, length * np.cos(yaw), length * np.sin(yaw),
fc=fc, ec=ec, head_width=width, head_length=width)
plt.plot(x, y)
def main():
"""Plot an example bezier curve."""
start_x = 10.0 # [m]
start_y = 1.0 # [m]
start_yaw = np.radians(180.0) # [rad]
end_x = -0.0 # [m]
end_y = -3.0 # [m]
end_yaw = np.radians(-45.0) # [rad]
offset = 3.0
path, control_points = calc_4points_bezier_path(
start_x, start_y, start_yaw, end_x, end_y, end_yaw, offset)
# Note: alternatively, instead of specifying start and end position
# you can directly define n control points and compute the path:
#control_points = np.array([[5., 1.], [-2.78, 1.], [-11.5, -4.5], [-6., -8.]])
#path = calc_bezier_path(control_points, n_points=100)
# Display the tangent, normal and radius of cruvature at a given point
t = 0.86 # Number in [0, 1]
x_target, y_target = bezier(t, control_points)
derivatives_cp = bezier_derivatives_control_points(control_points, 2)
point = bezier(t, control_points)
dt = bezier(t, derivatives_cp[1])
ddt = bezier(t, derivatives_cp[2])
# Radius of curvature
radius = 1 / curvature(dt[0], dt[1], ddt[0], ddt[1])
# Normalize derivative
dt /= np.linalg.norm(dt, 2)
tangent = np.array([point, point + dt])
normal = np.array([point, point + [- dt[1], dt[0]]])
curvature_center = point + np.array([- dt[1], dt[0]]) * radius
circle = plt.Circle(tuple(curvature_center), radius,
color=(0, 0.8, 0.8), fill=False, linewidth=1)
assert path.T[0][0] == start_x, "path is invalid"
assert path.T[1][0] == start_y, "path is invalid"
assert path.T[0][-1] == end_x, "path is invalid"
assert path.T[1][-1] == end_y, "path is invalid"
if show_animation: # pragma: no cover
fig, ax = plt.subplots()
ax.plot(path.T[0], path.T[1], label="Cubic Bezier Path")
ax.plot(control_points.T[0], control_points.T[1],
'--o', label="Control Points")
ax.plot(x_target, y_target)
ax.plot(tangent[:, 0], tangent[:, 1], label="Tangent")
ax.plot(normal[:, 0], normal[:, 1], label="Normal")
ax.add_artist(circle)
plot_arrow(start_x, start_y, start_yaw)
plot_arrow(end_x, end_y, end_yaw)
plt.xlabel('X')
plt.ylabel('Y')
ax.legend()
ax.axis("equal")
ax.grid(True)
for i, p in enumerate(control_points):
ax.annotate(f'P{i:d}', xy=p, xytext=(0,5), textcoords='offset points', ha='center')
plt.show()
if __name__ == '__main__':
main()
可能会有用。
is.character()