我正在使用Scrapy在一个页面上深层复制某些内容,抓取内容并下载该内容中的图像并相应地更新图像原始值。
例如我有:
<div class="A">
<img original="example1.com/1/1.png"></img>
</div>
我需要下载图像并更新新图像原始值(例如更新到mysite.com/1/1.png),然后保存内容。
我最终会得到的是:
<div class="A">
<img original="mysite.com/1/1.png"></img>
</div>
和我的磁盘上的图像。
是否可以通过Selector修改值?
或者我必须先下载图像并单独更新“原始”值吗?任何更好的解决方案?
答案 0 :(得分:1)
我收到了scrapy dev的回复。
Is it possible to modify the response content through Scrapy Selector?
// Auxilliary method for stream-reading of values
number ReadValueOfType(object fStream, string type, number byteOrder)
{
number val = 0
TagGroup tg = NewTagGroup()
if ( type == "bool" )
{
tg.TagGroupSetTagAsBoolean( type, 0 )
tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder )
tg.TagGroupGetTagAsBoolean( type, val )
}
else if ( type == "uint16" )
{
tg.TagGroupSetTagAsUInt16( type, 0 )
tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder )
tg.TagGroupGetTagAsUInt16( type, val )
}
else if ( type == "uint32" )
{
tg.TagGroupSetTagAsUInt32( type, 0 )
tg.TagGroupReadTagDataFromStream( type, fstream, byteOrder )
tg.TagGroupGetTagAsUInt32( type, val )
}
else Throw("Invalid read-type:"+type)
return val
}
string ExtractTextFromTiff( string path )
{
string txt
if ( !DoesFileExist(path) ) Throw("File not found.\n"+path)
// Open Stream
number fileID = OpenFileForReading( path )
object fStream = NewStreamFromFileReference(fileID,1)
// Read data byte order. (1 = big Endian, 2= little Endian for Gatan)
number val
number byteOrder = 0
val = fStream.ReadValueOfType( "uint16", byteOrder )
byteOrder = ( 0x4949 == val ) ? 2 : ( 0x4D4D == val ? 1 : 0 )
//Result("\n TIFF endian:"+byteOrder)
// Verify TIFF image
val = fStream.ReadValueOfType( "uint16", byteOrder )
if ( val != 42 ) Throw( "Not a valid TIFF image" )
// Browse all directories
number offset = fStream.ReadValueOfType( "uint32", byteOrder )
while( 0 != offset )
{
fStream.StreamSetPos( 0, offset ) // Start of IFD
number nEntries = fStream.ReadValueOfType( "uint16", byteOrder )
for ( number e=0;e<nEntries;e++)
{
number tag = fStream.ReadValueOfType( "uint16", byteOrder )
number typ = fStream.ReadValueOfType( "uint16", byteOrder )
number count = fStream.ReadValueOfType( "uint32", byteOrder )
number dataOffset = fStream.ReadValueOfType( "uint32", byteOrder )
//Result("\n entry # "+e+": ID["+tag+"]\ttyp="+typ+"\tcount="+count+"\t offset @ "+dataOffset)
if ( 2 == typ ) // ASCII
{
number currentPos = fStream.StreamGetPos()
fStream.StreamSetPos( 0, dataOffset )
string textField = fStream.StreamReadAsText( 0, count )
txt+=textField
fStream.StreamSetPos( 0, currentPos )
}
}
offset = fStream.ReadValueOfType( "uint32", byteOrder ) // this is 0000 for the last directory according to spec
}
return txt
}
String TruncWhiteSpaceBeforeAndAfter( string input )
{
string work = input
if ( len(work) == 0 ) return ""
while ( " " == left(work,1) )
{
work = right( work, len(work) - 1 )
if ( len(work) == 0 ) return ""
}
while ( " " == right(work,1) )
{
work = left( work, len(work) - 1 )
if ( len(work) == 0 ) return ""
}
return work
}
// INPUT: String with line-wise information
// OUTPUT: TagGroup
// Assumptions:
// - Groups are specified in a line in the format: [GroupName]
// - The string contains information line-wise in the format: KeyName=Vale
TagGroup CreateTagsFromString( string input )
{
TagGroup tg = NewTagGroup()
string work = input
string eoL = "\n"
string GroupLeadIn = "["
string GroupLeadOut = "]"
string keyToValueSep= "="
string groupName = ""
number pos = find(work,eoL )
while( -1 != pos )
{
string line = left(work,pos)
work = right(work,len(work)-pos-len(eoL))
number leadIn = find(line,GroupLeadIn)
number leadOut = find(line,GroupLeadOut)
number sep = find(line,keyToValueSep)
if ( ( -1 < leadIn ) && ( -1 < leadOut ) && ( leadIn < leadOut ) ) // Is it a new group? "[GROUPNAME]"
{
groupName = mid(line,leadIn+len(GroupLeadIn),leadOut-leadIn-len(GroupLeadOut))
groupName = TruncWhiteSpaceBeforeAndAfter(groupName)
}
else if( -1 < sep ) // Is it a value? "KEY=VALUE" ?
{
string key = left(line,sep)
string value= right(line,len(line)-sep-len(keyToValueSep))
key = TruncWhiteSpaceBeforeAndAfter(key)
value = TruncWhiteSpaceBeforeAndAfter(value)
string tagPath = groupName + ( "" == groupName ? "" : ":" ) + key
tg.TagGroupSetTagAsString( tagPath, value )
}
pos = find(work,eoL)
}
return tg
}
void ImportTIFFWithTags()
{
string path = GetApplicationDirectory("open_save",0)
if (!OpenDialog(NULL,"Select TIFF file",path, path)) exit(0)
string extractedText = ExtractTextFromTiff(path)
/*
if ( TwoButtonDialog("Show extracted text?","Yes","No") )
result(extractedtext)
*/
tagGroup infoAsTags = CreateTagsFromString(extractedText )
/*
if ( TwoButtonDialog("Output tagstructure?","Yes","No") )
infoAsTags.TagGroupOpenBrowserWindow(path,0)
*/
// Import data and add info-tags
image imported := OpenImage(path)
imported.ImageGetTagGroup().TagGroupSetTagAsTagGroup("TIFF Tags",infoAsTags)
imported.ShowImage()
// Calibrate image, if info is found
// It seems FEI stores this value as [m] in the tags PixelHeight and PixelWidth
// while ZEISS images contain the size of the FOV in the tags "Height" and "Width" as string including unit
number scaleX = 0
number scaleY = 0
string unitX
string UnitY
string hStr
string wStr
if ( imported.GetNumberNote( "TIFF Tags:Scan:PixelWidth", scaleX ) )
{
unitX = "nm"
scaleX *= 1e9
}
if ( imported.GetNumberNote( "TIFF Tags:Scan:PixelHeight", scaleY ) )
{
unitY = "nm"
scaleY *= 1e9
}
if ( imported.GetStringNote( "TIFF Tags:Width", wStr ) )
{
number pos = find( wStr, " " )
if ( -1 < pos )
{
scaleX = val( left(wStr,pos) )
scaleX /= imported.ImageGetDimensionSize(0)
unitX = right( wStr, len(wStr)-pos-1 )
}
}
if ( imported.GetStringNote( "TIFF Tags:Height", hStr ) )
{
number pos = find( hStr, " " )
if ( -1 < pos )
{
scaleY = val( left(hStr,pos) )
scaleY /= imported.ImageGetDimensionSize(1)
unitY = right( hStr, len(hStr)-pos-1 )
}
}
if ( 0 < scaleX )
{
imported.ImageSetDimensionScale(0,scaleX)
imported.ImageSetDimensionUnitString(0,unitX)
}
if ( 0 < scaleY )
{
imported.ImageSetDimensionScale(1,scaleY)
imported.ImageSetDimensionUnitString(1,unitY)
}
}
ImportTIFFWithTags()