在R中读取二进制文件

时间:2012-10-17 10:10:52

标签: r

在以下代码中,可以使用这些属性导入部分LAS文件版本1.1:

列表。需要项目格式大小

  1. X long 4 bytes *
  2. Y long 4 bytes *
  3. Z长4字节*
  4. 强度无符号短2字节
  5. 返回3位(位0,1,2)3位*
  6. 返回数(给定脉冲)3位(位3,4,5)3位*
  7. 扫描方向标志1位(位6)1位*
  8. 飞行边缘线1位(位7)1位*
  9. 分类unsigned char 1 byte *
  10. 扫描角度等级(-90到+90) - 左侧无符号字符1个字节*
  11. 用户数据unsigned char 1 byte
  12. 点源ID无符号短2字节*
  13. GPS时间双8字节*
  14. 并返回x,y,z和强度(上面列表的第1到4行):

    allbytes <- matrix(readBin(con, "raw", n = pointDataRecordLength * numberPointRecords, size = 1, endian = "little"),
                       ncol= pointDataRecordLength, nrow = numberPointRecords, byrow = TRUE)    
    close(con)
    mm <- matrix(readBin(t(allbytes[,1:(3*4)]), "integer", size = 4, n = 3 * numberPointRecords, endian = "little"), ncol = 3, byrow = TRUE)
    
    mm[,1] <- mm[ ,1] * xyzScaleOffset[1,1] + xyzScaleOffset[1, 2]
    mm[,2] <- mm[ ,2] * xyzScaleOffset[2,1] + xyzScaleOffset[2, 2]
    mm[,3] <- mm[ ,3] * xyzScaleOffset[3,1] + xyzScaleOffset[3, 2]
    colnames(mm) <- c("x", "y", "z")
    
    intensity <- readBin(t(allbytes[, 13:14]), "double", size = 2, n = numberPointRecords, signed = FALSE, endian = "little")
    

    我想扩展代码以读取“返回号码”和“返回号码”(表格的第5行和第6行)。我做错了什么?

    returnNumber <- readBin(t(allbytes[, 15]), "integer", size = 1, n = numberPointRecords, signed = FALSE, endian = "little")
    

    预期的回报应该是1到5之间的整数。提前感谢!

    示例文件:

    link

    完整代码:

        publicHeaderDescription <- function() {
      hd <- structure(list(Item = c("File Signature (\"LASF\")",
                                    "(1.1) File Source ID", "(1.1) Global Encoding",
                                    "(1.1) Project ID - GUID data 1", "(1.1) Project ID - GUID data 2",
                                    "(1.1) Project ID - GUID data 3", "(1.1) Project ID - GUID data 4",
                                    "Version Major", "Version Minor", "(1.1) System Identifier",
                                    "Generating Software", "(1.1) File Creation Day of Year",
                                    "(1.1) File Creation Year", "Header Size", "Offset to point data",
                                    "Number of variable length records",
                                    "Point Data Format ID (0-99 for spec)", "Point Data Record Length",
                                    "Number of point records", "Number of points by return",
                                    "X scale factor", "Y scale factor", "Z scale factor", "X offset",
                                    "Y offset", "Z offset", "Max X", "Min X", "Max Y", "Min Y", "Max Z",
                                    "Min Z"), Format = c("char[4]", "unsigned short", "unsigned short",
                                                         "unsigned long", "unsigned short", "unsigned short",
                                                         "unsigned char[8]", "unsigned char", "unsigned char", "char[32]",
                                                         "char[32]", "unsigned short", "unsigned short", "unsigned short",
                                                         "unsigned long", "unsigned long", "unsigned char", "unsigned short",
                                                         "unsigned long", "unsigned long[5]", "double", "double", "double",
                                                         "double", "double", "double", "double", "double", "double", "double",
                                                         "double", "double"), Size = c("4 bytes", "2 bytes", "2 bytes",
                                                                                       "4 bytes", "2 byte", "2 byte", "8 bytes", "1 byte", "1 byte",
                                                                                       "32 bytes", "32 bytes", "2 bytes", "2 bytes", "2 bytes", "4 bytes",
                                                                                       "4 bytes", "1 byte", "2 bytes", "4 bytes", "20 bytes", "8 bytes",
                                                                                       "8 bytes", "8 bytes", "8 bytes", "8 bytes", "8 bytes", "8 bytes",
                                                                                       "8 bytes", "8 bytes", "8 bytes", "8 bytes", "8 bytes"), Required =
                                                                                         c("*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*",
                                                                                           "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*", "*",
                                                                                           "*", "*", "*", "*", "*")), .Names = c("Item", "Format", "Size",
                                                                                                                                 "Required"), row.names = 2:33, class = "data.frame")
      hd$what <- ""
      hd$what[grep("unsigned", hd$Format)] <- "integer"
      hd$what[grep("char", hd$Format)] <- "raw"
      hd$what[grep("short", hd$Format)] <- "integer"
      hd$what[grep("long", hd$Format)] <- "integer"
      hd$what[grep("double", hd$Format)] <- "numeric"
      hd$signed <- TRUE
      hd$signed[grep("unsigned", hd$Format)] <- FALSE
      ## number of values in record
      hd$n <- as.numeric(gsub("[[:alpha:][:punct:]]", "", hd$Format))
      hd$n[hd$what == "character"] <- 1
      hd$n[is.na(hd$n)] <- 1
      ## size of record
      hd$Hsize <- as.numeric(gsub("[[:alpha:]]", "", hd$Size))
      ## size of each value in record
      hd$Rsize <- hd$Hsize / hd$n
      hd$Rsize[hd$what == "raw"] <- 1
      hd$n[hd$what == "raw"] <- hd$Hsize[hd$what == "raw"]
      hd
    }
    
    readLAS <-
      function(lasfile, skip = 0, nrows = NULL, returnSP = FALSE, returnHeaderOnly = FALSE) {
    
        hd <- publicHeaderDescription()
        pheader <- vector("list", nrow(hd))
        names(pheader) <- hd$Item
        con <- file(lasfile, open = "rb")
        isLASFbytes <- readBin(con, "raw", size = 1, n = 4, endian = "little")
        pheader[[hd$Item[1]]] <- readBin(isLASFbytes, "character", size = 4, endian = "little")
        if (! pheader[[hd$Item[1]]] == "LASF") {
          stop("Not a valid LAS file")
        }
        for (i in 2:nrow(hd)) {
          pheader[[hd$Item[i]]] <- readBin(con, what = hd$what[i], signed = hd$signed[i], size = hd$Rsize[i], endian = "little", n = hd$n[i])
          #print(names(pheader)[i])
          #print(pheader[[hd$Item[i]]])
        }
        close(con)
        ## read the data
        numberPointRecords <- pheader[["Number of point records"]]
        offsetToPointData <- pheader[["Offset to point data"]]
        pointDataRecordLength <-pheader[["Point Data Record Length"]]
        xyzScaleOffset <- cbind(unlist(pheader[c("X scale factor", "Y scale factor", "Z scale factor")]),
                                unlist(pheader[c("X offset", "Y offset", "Z offset")]))
    
    
        if (returnHeaderOnly) return(pheader)
    
        con <- file(lasfile, open = "rb")
        junk <- readBin(con, "raw", size = 1, n = offsetToPointData)
    
        ## deal with rows to skip and max rows to be read
        if (skip > 0) {
          ## seek is unreliable on windows, or I'm using it incorrectly
          ## so we junk the bytes to skip
          junk <- readBin(con, "raw", size = 1, n = pointDataRecordLength * skip)
          numberPointRecords <- numberPointRecords - skip
          #pos <- seek(con, where = pointDataRecordLength * skip)
          # print(c(pos = seek(con), skip = skip, where = pointDataRecordLength * skip))
        }
        if (!is.null(nrows)) {
          if (numberPointRecords > nrows) numberPointRecords <- nrows
        }
    
        if (numberPointRecords < 1) stop("no records left to read")
    
    
        # include a loop to read just points inside the x and y coordinates
    
        allbytes <- matrix(readBin(con, "raw", n = pointDataRecordLength * numberPointRecords, size = 1, endian = "little"),
                           ncol= pointDataRecordLength, nrow = numberPointRecords, byrow = TRUE)
    
    
        close(con)
        mm <- matrix(readBin(t(allbytes[,1:(3*4)]), "integer", size = 4, n = 3 * numberPointRecords, endian = "little"), ncol = 3, byrow = TRUE)
        gpstime <- NULL
        if (ncol(allbytes) == 28) gpstime <- readBin(t(allbytes[ , 21:28]), "numeric", size = 8, n = numberPointRecords, endian = "little")
    
        intensity <- readBin(t(allbytes[, 13:14]), "double", size = 2, n = numberPointRecords, signed = FALSE, endian = "little")
        mm[,1] <- mm[ ,1] * xyzScaleOffset[1,1] + xyzScaleOffset[1, 2]
        mm[,2] <- mm[ ,2] * xyzScaleOffset[2,1] + xyzScaleOffset[2, 2]
        mm[,3] <- mm[ ,3] * xyzScaleOffset[3,1] + xyzScaleOffset[3, 2]
        colnames(mm) <- c("x", "y", "z")
    
        returnNumber <- readBin(t(allbytes[,15]), "integer", size = 1, n = numberPointRecords, signed = FALSE, endian = "little")
        require(bitops)
    
    
        if (returnSP) {
          require(sp)
          SpatialPoints(cbind(mm, gpstime, intensity))
        } else {
          cbind(mm, gpstime, intensity)
        }
      }
    

1 个答案:

答案 0 :(得分:5)

从您的问题中不清楚您是否理解字段5-8都是以单个字节编码的,因此您需要提取这些位。假设其余代码有效,

require(bitops)

fields.5.to.8 <- readBin(t(allbytes[, 15]), "integer", size = 1, n = numberPointRecords, signed = FALSE, endian = "little")

# bits 0..2: byte & 00000111
field.5 <- bitAnd(7, fields.5.to.8)
# bits 3..5: byte & 00111000 >> 3
field.6 <- bitShiftR(bitAnd(56, fields.5.to.8), 3)
# bit 6: & 0100000 >> 6
field.7 <- bitShiftR(bitAnd(fields.5.to.8, 64), 6)
# bit 7: & 1000000 >> 7 
field.8 <- bitShiftR(bitAnd(fields.5.to.8, 128), 7)