是否可以通过Scrapy Selector修改响应内容?

时间:2016-03-02 15:49:27

标签: python scrapy scrapy-spider

我正在使用Scrapy在一个页面上深层复制某些内容,抓取内容并下载该内容中的图像并相应地更新图像原始值。

例如我有:

<div class="A">
    <img original="example1.com/1/1.png"></img>
</div>

我需要下载图像并更新新图像原始值(例如更新到mysite.com/1/1.png),然后保存内容。

我最终会得到的是:

<div class="A">
    <img original="mysite.com/1/1.png"></img>
</div>

和我的磁盘上的图像。

是否可以通过Selector修改值?

或者我必须先下载图像并单独更新“原始”值吗?任何更好的解决方案?

1 个答案:

答案 0 :(得分:1)

我收到了scrapy dev的回复。

Is it possible to modify the response content through Scrapy Selector?

// Auxilliary method for stream-reading of values
number ReadValueOfType(object fStream, string type, number byteOrder)
{
    number val = 0
    TagGroup tg = NewTagGroup()
    if ( type == "bool" )
    {
        tg.TagGroupSetTagAsBoolean( type, 0 )
        tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder ) 
        tg.TagGroupGetTagAsBoolean( type, val )
    }
    else if ( type == "uint16" )
    {
        tg.TagGroupSetTagAsUInt16( type, 0 )
        tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder ) 
        tg.TagGroupGetTagAsUInt16( type, val )
    }
    else if ( type == "uint32" )
    {
        tg.TagGroupSetTagAsUInt32( type, 0 )
        tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder ) 
        tg.TagGroupGetTagAsUInt32( type, val )
    }
    else Throw("Invalid read-type:"+type)
    return val
}

string ExtractTextFromTiff( string path )
{
    string txt
    if ( !DoesFileExist(path) ) Throw("File not found.\n"+path)

    // Open Stream 
    number fileID = OpenFileForReading( path )
    object fStream = NewStreamFromFileReference(fileID,1)

    // Read data byte order. (1 = big Endian, 2= little Endian for Gatan)
    number val
    number byteOrder = 0
    val = fStream.ReadValueOfType( "uint16", byteOrder )
    byteOrder = ( 0x4949 == val  ) ? 2 : ( 0x4D4D == val ? 1 : 0 )
    //Result("\n TIFF endian:"+byteOrder)

    // Verify TIFF image
    val = fStream.ReadValueOfType( "uint16", byteOrder )
    if ( val != 42 ) Throw( "Not a valid TIFF image" )

    // Browse all directories
    number offset = fStream.ReadValueOfType( "uint32", byteOrder )

    while( 0 != offset )
    {
        fStream.StreamSetPos( 0, offset ) // Start of IFD
        number nEntries = fStream.ReadValueOfType( "uint16", byteOrder )
        for ( number e=0;e<nEntries;e++)
        {
            number tag        = fStream.ReadValueOfType( "uint16", byteOrder )
            number typ        = fStream.ReadValueOfType( "uint16", byteOrder )
            number count      = fStream.ReadValueOfType( "uint32", byteOrder )
            number dataOffset = fStream.ReadValueOfType( "uint32", byteOrder )
            //Result("\n entry # "+e+": ID["+tag+"]\ttyp="+typ+"\tcount="+count+"\t offset @ "+dataOffset)
            if ( 2 == typ ) // ASCII
            {
                number currentPos = fStream.StreamGetPos()
                fStream.StreamSetPos( 0, dataOffset )
                string textField = fStream.StreamReadAsText( 0, count )
                txt+=textField
                fStream.StreamSetPos( 0, currentPos )
            }   
        }
        offset = fStream.ReadValueOfType( "uint32", byteOrder ) // this is 0000 for the last directory according to spec
    }

    return txt
}

String TruncWhiteSpaceBeforeAndAfter( string input )
{
    string work = input
    if ( len(work) == 0 ) return ""
    while ( " " == left(work,1) )
    {
        work = right( work, len(work) - 1 )
        if ( len(work) == 0 ) return "" 
    }
    while ( " " == right(work,1) )
    {
        work = left( work, len(work) - 1 )
        if ( len(work) == 0 ) return "" 
    }
    return work
}


// INPUT:  String with line-wise information
// OUTPUT: TagGroup
// Assumptions:  
//  - Groups are specified in a line in the format:             [GroupName]
//  - The string contains information line-wise in the format:  KeyName=Vale
TagGroup CreateTagsFromString( string input )
{
    TagGroup tg = NewTagGroup()
    string work = input

    string eoL          = "\n"
    string GroupLeadIn  = "["
    string GroupLeadOut = "]"
    string keyToValueSep= "="
    string groupName = ""

    number pos = find(work,eoL )
    while( -1 != pos )
    {
        string line = left(work,pos)
        work = right(work,len(work)-pos-len(eoL))
        number leadIn  = find(line,GroupLeadIn)
        number leadOut = find(line,GroupLeadOut)
        number sep = find(line,keyToValueSep)
        if ( ( -1 < leadIn ) && ( -1 < leadOut ) && ( leadIn < leadOut ) ) // Is it a new group? "[GROUPNAME]"
        {
            groupName = mid(line,leadIn+len(GroupLeadIn),leadOut-leadIn-len(GroupLeadOut))
            groupName = TruncWhiteSpaceBeforeAndAfter(groupName)
        }
        else if( -1 < sep )                                                 // Is it a value? "KEY=VALUE" ?
        {
            string key  = left(line,sep)
            string value= right(line,len(line)-sep-len(keyToValueSep))
            key   = TruncWhiteSpaceBeforeAndAfter(key)
            value = TruncWhiteSpaceBeforeAndAfter(value)
            string tagPath = groupName + ( "" == groupName ? "" : ":" ) + key
            tg.TagGroupSetTagAsString( tagPath, value )
        }
        pos = find(work,eoL)        
    }
    return tg
}

void ImportTIFFWithTags()
{
    string path = GetApplicationDirectory("open_save",0)
    if (!OpenDialog(NULL,"Select TIFF file",path, path)) exit(0)

    string extractedText = ExtractTextFromTiff(path)
    /*
    if ( TwoButtonDialog("Show extracted text?","Yes","No") )
        result(extractedtext)
    */

    tagGroup infoAsTags = CreateTagsFromString(extractedText )
    /*
    if ( TwoButtonDialog("Output tagstructure?","Yes","No") )
        infoAsTags.TagGroupOpenBrowserWindow(path,0)
    */

    // Import data and add info-tags
    image imported := OpenImage(path)
    imported.ImageGetTagGroup().TagGroupSetTagAsTagGroup("TIFF Tags",infoAsTags)
    imported.ShowImage()

    // Calibrate image, if info is found
    // It seems FEI stores this value as [m] in the tags PixelHeight and PixelWidth
    // while ZEISS images contain the size of the FOV in the tags "Height" and "Width" as string including unit
    number scaleX = 0
    number scaleY = 0
    string unitX 
    string UnitY
    string hStr
    string wStr
    if ( imported.GetNumberNote( "TIFF Tags:Scan:PixelWidth", scaleX ) )
    {
        unitX = "nm"
        scaleX *= 1e9
    }
    if ( imported.GetNumberNote( "TIFF Tags:Scan:PixelHeight", scaleY ) )
    {
        unitY = "nm"
        scaleY *= 1e9
    }
    if ( imported.GetStringNote( "TIFF Tags:Width", wStr ) )
    {
        number pos = find( wStr, " " )
        if ( -1 < pos )
        {
            scaleX = val( left(wStr,pos) )
            scaleX /= imported.ImageGetDimensionSize(0)
            unitX  = right( wStr, len(wStr)-pos-1 )
        }
    }
    if ( imported.GetStringNote( "TIFF Tags:Height", hStr ) )
    {
        number pos = find( hStr, " " )
        if ( -1 < pos )
        {
            scaleY = val( left(hStr,pos) )
            scaleY /= imported.ImageGetDimensionSize(1)
            unitY  = right( hStr, len(hStr)-pos-1 )
        }
    }
    if ( 0 < scaleX )
    {
        imported.ImageSetDimensionScale(0,scaleX)
        imported.ImageSetDimensionUnitString(0,unitX)
    }
    if ( 0 < scaleY )
    {
        imported.ImageSetDimensionScale(1,scaleY)
        imported.ImageSetDimensionUnitString(1,unitY)
    }
}
ImportTIFFWithTags()