重复数据删除会导致拆分数据帧的问题

时间:2018-06-20 02:28:28

标签: python pandas dataframe

我具有将数据集分为训练集和测试集的功能:

def train_test_split(df, train_percent=.7, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    train = df.iloc[perm[:train_end]]
    test = df.iloc[perm[train_end:]]
    return train, test

它在249681 rows × 9 columns的数据帧上正常工作

其中,由于缺少太多值,我决定删除4列:

df_subset_dup = df_encode.iloc[:,:5]
df_subset = df_subset_dup.drop_duplicates()

此后,当我执行df_trainRaw4, df_testRaw4 = train_test_split(df_subset)时,我得到IndexError: positional indexers are out-of-bounds。但是,执行df_trainRaw4, df_testRaw4 = train_test_split(df_subset_dup)不会返回任何错误。

我正在使用drop_duplicates导致该错误的原因以及如何纠正该错误?

1 个答案:

答案 0 :(得分:1)

public enum UnitType { Ground, Air, Water } public enum AttackType { None, Ground, Air, Water } public class Unit : MonoBehaviour { public UnitType unitType; public AttackType attackType; void OnTriggerEnter(Collider other) { Unit unit = other.GetComponent<Unit>(); if(unit.unitType == attackType) { // Send current unit to attack here } } } 是数据框的实际索引,但是您正在使用基于位置的public class mainClass2 extends Application { Stage window; TableView<syncedFolders> folderTable; Button file, edit, view, addFolder, printInfo, close, deleteFolder; private String name, location, dateModified; private long size; private double printSize = 0, bytes = 0, kilobytes = 0, megabytes = 0, gigabytes = 0, tempSize = 0; private String printSizeAb = ""; public static void main(String[] args) { launch(args); } @Override public void start(Stage primaryStage) throws Exception { window = primaryStage; HBox topMenu = new HBox(); file = new Button("File"); edit = new Button("Edit"); view = new Button("View"); topMenu.getChildren().addAll(file, edit, view); VBox leftMenu = new VBox(); printInfo = new Button("Print folder info"); printInfo.setOnAction(e -> { round(printSize, 1); System.out.println("Name: " + name); System.out.println("Location: " + location); System.out.println("Last Modified: " + dateModified); System.out.println("Size: " + tempSize + printSizeAb); }); leftMenu.getChildren().add(printInfo); HBox botBox = new HBox(); addFolder = new Button("Add Folder"); deleteFolder = new Button("Delete Folder"); close = new Button("Exit"); addFolder.setOnAction(e -> { DirectoryChooser chooser = new DirectoryChooser(); chooser.setTitle("JavaFX Projects"); File defaultDirectory = new File("D:\\"); chooser.setInitialDirectory(defaultDirectory); File selectedDirectory = chooser.showDialog(window); name = selectedDirectory.getName(); location = selectedDirectory.toString(); size = getFolderSize(selectedDirectory); bytes = size; kilobytes = (bytes / 1024); megabytes = (kilobytes / 1024); gigabytes = (megabytes / 1024); if (bytes < 1024) { printSize = kilobytes; printSizeAb = " KB"; } else if (bytes >= 1024 && bytes < Math.pow(1024, 3)) { printSize = megabytes; printSizeAb = " MB"; } else // if (bytes >= Math.pow(1024, 2) && bytes <= Math.pow(1024, 3)) { printSize = gigabytes; printSizeAb = " GB"; } addFolder(); lasModifiedDate(); }); // Name column TableColumn<syncedFolders, String> nameCol = new TableColumn<>("Name"); nameCol.setMinWidth(200); nameCol.setCellValueFactory(new PropertyValueFactory<>("name")); // location column TableColumn<syncedFolders, String> locationCol = new TableColumn<>("Location"); locationCol.setMinWidth(200); locationCol.setCellValueFactory(new PropertyValueFactory<>("location")); // date modified column TableColumn<syncedFolders, String> dateModifiedCol = new TableColumn<>("Last Modified"); dateModifiedCol.setMinWidth(200); dateModifiedCol.setCellValueFactory(new PropertyValueFactory<>("dateModified")); // size column TableColumn<syncedFolders, Double> sizeCol = new TableColumn<>("Size"); sizeCol.setMinWidth(200); sizeCol.setCellValueFactory(new PropertyValueFactory<>("size")); folderTable = new TableView<>(); folderTable.setItems(getSyncedFolders()); folderTable.getColumns().addAll(nameCol, locationCol, dateModifiedCol, sizeCol); close.setOnAction(e -> closeProgram()); botBox.setPadding(new Insets(10, 10, 10, 10)); botBox.setSpacing(10); botBox.getChildren().addAll(addFolder, deleteFolder, close); BorderPane borderPane = new BorderPane(); borderPane.setTop(topMenu); borderPane.setLeft(leftMenu); borderPane.setCenter(folderTable); borderPane.setBottom(botBox); Scene scene = new Scene(borderPane, 800, 600); window.setScene(scene); window.setTitle("the title"); window.show(); window.setOnCloseRequest(e -> { e.consume(); closeProgram(); }); } // Get all of the products public ObservableList<syncedFolders> getSyncedFolders() { ObservableList<syncedFolders> folders = FXCollections.observableArrayList(); folders.add(new syncedFolders("Folder", "D://", "July", 3.4)); folders.add(new syncedFolders(name, location, dateModified, tempSize)); return folders; } public void addFolder() { round(printSize, 1); folderTable.setItems(getSyncedFolders()); folderTable.getItems().add(new syncedFolders(name, location, dateModified, tempSize)); } private double round(double value, int precision) { int scale = (int) Math.pow(10, precision); tempSize = (double) Math.round(value * scale) / scale; return tempSize; } public String lasModifiedDate() { Path path = Paths.get(location); DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy - hh:mm:ss"); FileTime fileTime = null; try { fileTime = Files.getLastModifiedTime(path); } catch (IOException e1) { System.out.println("Cannot get the last modified time"); } dateModified = dateFormat.format(fileTime.toMillis()); return dateModified; } private long getFolderSize(File folder) { long length = 0; File[] files = folder.listFiles(); int count = files.length; for (int i = 0; i < count; i++) { if (files[i].isFile()) { length += files[i].length(); } else { length += getFolderSize(files[i]); } } return length; } private void closeProgram() { /** * Alert alert = new Alert(AlertType.CONFIRMATION); alert.setTitle("Confirmation * Dialog"); alert.setHeaderText("You are about to exit"); * alert.setContentText("Are you ok with this?"); * * Optional<ButtonType> result = alert.showAndWait(); if (result.get() == * ButtonType.OK) **/ window.close(); } } public class syncedFolders { private String name, location, dateModified; private double size; public syncedFolders() { this.name = ""; this.location = ""; this.dateModified = ""; this.size = 0; } public syncedFolders(String s) { this.name = s; } public syncedFolders(String name, String location, String dateModified, double size) { this.name = name; this.location = location; this.dateModified = dateModified; this.size = size; } public String getname() { return name; } public void setname(String s) { name = s; } public String getlocation() { return location; } public void setlocation(String s) { location = s; } public String getdateModified() { return dateModified; } public void setdateModified(String s) { dateModified = s; } public double getsize() { return size; } public void setsize(double d) { size = d; } } 来用perm子集数据框;这可能是在删除重复项之后删除了一些索引,现在最大的索引大于数据帧的行数的问题。将iloc更改为perm应该可以解决此问题:

iloc