为什么scipy的genfromtxt将NaN值读取为-1?

时间:2018-10-24 15:57:37

标签: python scipy

Option Explicit

Public Sub GetFundInfo()
    Dim sResponse As String, html As HTMLDocument,  i As Long
    Dim headers(), funds(), url As String, results As Collection, ws As Worksheet
    Const BASE_URL As String = "https://www.marketwatch.com/investing/fund/"

    Application.ScreenUpdating = False

    headers = Array("Open", "Shares Outstanding", "Total Net Assets", "NAV")
    Set results = New Collection
    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set html = New HTMLDocument

    funds = Application.Transpose(ws.Range("A2:A3").Value) '<== Change the range here to the single column range containing your dotNums.

    For i = LBound(funds) To UBound(funds)
        If Not IsEmpty(funds(i)) Then
            url = BASE_URL & funds(i)
            html.body.innerHTML = GetString(url)
            results.Add GetInfo(html).Items
        End If
    Next

    If results.Count > 0 Then
        Dim item As Variant, r As Long, c As Long
        r = 2: c = 2
        With ws
            .Cells(1, c).Resize(1, UBound(headers) + 1) = headers
            For Each item In results
                .Cells(r, c).Resize(1, UBound(item) + 1) = item
                r = r + 1
            Next
        End With
    End If
    Application.ScreenUpdating = True
End Sub


Public Function GetString(ByVal url As String) As String
    Dim http As Object
    Set http = CreateObject("MSXML2.XMLHTTP")
    Dim sResponse As String
    With http
        .Open "GET", url, False
        .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
        .send
        sResponse = StrConv(.responseBody, vbUnicode)
        GetString = sResponse
    End With
End Function

Public Function GetInfo(ByVal html As HTMLDocument) As Object
    Dim dict As Object, i As Long
    Set dict = CreateObject("Scripting.Dictionary")
    dict.Add "Open", vbNullString
    dict.Add "Shares Outstanding", vbNullString
    dict.Add "Total Net Assets", vbNullString
    dict.Add "NAV", vbNullString

    Dim values As Object, labels As Object

    With html
        Set values = .querySelectorAll(".kv__value.kv__primary")
        Set labels = .querySelectorAll(".kv__label")

        For i = 0 To labels.Length - 1
            If dict.Exists(labels.item(i).innerText) Then dict(labels.item(i).innerText) = values.item(i).innerText
        Next
    End With
    Set GetInfo = dict
End Function

而不是-1,我应该得到一个NaN,在原始数据集中有一个NaN。

1 个答案:

答案 0 :(得分:1)

您将"\t"作为数据类型而不是定界符传递。

请尝试:

import scipy as sp
data = sp.genfromtxt(r"C:\Users\DELL INSPIRON N3542\Downloads/1400OS_Code/1400OS_01_Codes/data/web_traffic.tsv", delimiter="\t")

我认为您得到的是-1而不是nan,因为按设计,整数不支持nan,只有浮点数支持。

我不确定为什么将"\t"作为数据类型来理解为int64并且不会引发错误。