我正在尝试使用Python从包含大约4000页的网站中删除数据,每页包含25个链接。
我的问题是,在大约200个已处理的页面之后,性能变得非常可怕,以至于我的计算机上的其他程序也会冻结。
我想这是关于我没有正确使用内存或类似的东西。如果有人可以帮我解决这个问题,让我的脚本运行更顺畅,对我的系统要求不高,我将非常感激。
提前感谢您的帮助。 :)
修改 我找到了解决方案,你可以在我向下滚动一下时给出的答案中找到它。感谢所有试图帮助我的人,特别是etna和Walter A给了我很好的建议让我走上正轨。 :)
from pprint import pprint
from lxml import etree
import itertools
import requests
def function parsePageUrls(page):
return page.xpath('//span[@class="tip"]/a/@href')
def function isLastPage(page):
if not page.xpath('//a[@rel="next"]'):
return True
urls = []
for i in itertools.count(1):
content = requests.get('http://www.example.com/index.php?page=' + str(i), allow_redirects=False)
page = etree.HTML(content.text)
urls.extend(parsePageUrls(page))
if isLastPage(page):
break
pprint urls
答案 0 :(得分:0)
我终于找到了解决方案。问题是我认为我使用字符串列表作为tree.xpath的返回值,而是它是一个_ElementUnicodeResult-Objects列表阻止GC清除内存,因为它们保存了对其父级的引用。
所以解决方案是将这些_ElementUnicodeResult-Objects转换为普通字符串以去除引用。
以下是帮助我理解问题的来源:http://lxml.de/api/lxml.etree._ElementTree-class.html#xpath
对于提供的代码,以下修复了它:
而不是:
Startup.Configure()
必须是:
public class SomethingWatchFaceService extends CanvasWatchFaceService {
private static final String TAG = "SomethingWatchFaceService";
@Override
public Engine onCreateEngine() {
/* provide your watch face implementation */
return new Engine();
}
/* implement service callback methods */
private class Engine extends CanvasWatchFaceService.Engine {
Bitmap mBackgroundBitmap;
Bitmap mBackgroundScaledBitmap;
String[] backgroundColor = {"red", "green", "blue"};
int let= new Random().nextInt(backgroundColor.length);
String randomColor = (backgroundColor[let]);
Integer[] listDrawable = {R.drawable.back1, R.drawable.back2};
//Generating the list number for drawable
Random randNumForDrawable = new Random();
int n = randNumForDrawable.nextInt(2);
//Member variables
private Typeface WATCH_TEXT_TYPEFACE = Typeface.create( Typeface.SERIF, Typeface.NORMAL );
private static final int MSG_UPDATE_TIME_ID = 42;
private long mUpdateRateMs = 1000;
private Time mDisplayTime;
private Paint mBackgroundColorPaint;
private Paint mTextColorPaint;
private boolean mHasTimeZoneReceiverBeenRegistered = false;
private boolean mIsInMuteMode;
private boolean mIsLowBitAmbient;
private float mXOffset;
private float mYOffset;
private int mBackgroundColor = Color.parseColor(randomColor);
private int mTextColor = Color.parseColor( "white" );
final BroadcastReceiver mTimeZoneBroadcastReceiver = new BroadcastReceiver() {
@Override
public void onReceive(Context context, Intent intent) {
mDisplayTime.clear( intent.getStringExtra( "time-zone" ) );
mDisplayTime.setToNow();
}
};
private final Handler mTimeHandler = new Handler() {
@Override
public void handleMessage(Message msg) {
switch( msg.what ) {
case MSG_UPDATE_TIME_ID: {
invalidate();
if( isVisible() && !isInAmbientMode() ) {
long currentTimeMillis = System.currentTimeMillis();
long delay = mUpdateRateMs - ( currentTimeMillis % mUpdateRateMs );
mTimeHandler.sendEmptyMessageDelayed( MSG_UPDATE_TIME_ID, delay );
}
break;
}
}
}
};
@Override
public void onCreate(SurfaceHolder holder) {
super.onCreate(holder);
/* load the background image */
Resources resources = SomethingWatchFaceService.this.getResources();
Drawable backgroundDrawable = resources.getDrawable(listDrawable[n]);
mBackgroundBitmap = ((BitmapDrawable) backgroundDrawable).getBitmap();
setWatchFaceStyle( new WatchFaceStyle.Builder( SomethingQuotesWatchFaceService.this )
.setBackgroundVisibility( WatchFaceStyle.BACKGROUND_VISIBILITY_INTERRUPTIVE )
.setCardPeekMode( WatchFaceStyle.PEEK_MODE_SHORT)
.setShowSystemUiTime( false )
.build()
);
initBackground();
initDisplayText();
mDisplayTime = new Time();
}
@Override
public void onPropertiesChanged(Bundle properties) {
super.onPropertiesChanged(properties);
/* get device features (burn-in, low-bit ambient) */
if( properties.getBoolean( PROPERTY_BURN_IN_PROTECTION, false ) ) {
mIsLowBitAmbient = properties.getBoolean( PROPERTY_LOW_BIT_AMBIENT, false );
}
}
@Override
public void onTimeTick() {
super.onTimeTick();
/* the time changed */
invalidate();
}
@Override
public void onAmbientModeChanged(boolean inAmbientMode) {
super.onAmbientModeChanged(inAmbientMode);
// when Ambient Mode changes, we changes the color of the background paint.
if( inAmbientMode ) {
mTextColorPaint.setColor( Color.parseColor( "grey" ) );
mBackgroundColorPaint.setColor( Color.parseColor( "black" ) );
} else {
mTextColorPaint.setColor( Color.parseColor( "white" ) );
mBackgroundColorPaint.setColor( Color.parseColor( randomColor ) );
}
if( mIsLowBitAmbient ) {
mTextColorPaint.setAntiAlias( !inAmbientMode );
}
invalidate();
updateTimer();
}
@Override
public void onDraw(Canvas canvas, Rect bounds) {
drawBackground( canvas, bounds );
int width = bounds.width();
int height = bounds.height();
// Draw the background, scaled to fit.
if (mBackgroundScaledBitmap == null
|| mBackgroundScaledBitmap.getWidth() != width
|| mBackgroundScaledBitmap.getHeight() != height) {
mBackgroundScaledBitmap = Bitmap.createScaledBitmap(mBackgroundBitmap,
width, height, true /* filter */);
}
canvas.drawBitmap(mBackgroundScaledBitmap, 0, 0, null);
mDisplayTime.setToNow();
drawTimeText( canvas );
}
private void initBackground() {
mBackgroundColorPaint = new Paint();
mBackgroundColorPaint.setColor( mBackgroundColor );
}
private void drawBackground( Canvas canvas, Rect bounds ) {
canvas.drawRect( 0, 0, bounds.width(), bounds.height(), mBackgroundColorPaint );
}
private void initDisplayText() {
mTextColorPaint = new Paint();
mTextColorPaint.setColor( mTextColor );
mTextColorPaint.setTypeface( WATCH_TEXT_TYPEFACE );
mTextColorPaint.setAntiAlias( true );
mTextColorPaint.setTextSize( getResources().getDimension( R.dimen.text_size ) );
}
private void updateTimer() {
mTimeHandler.removeMessages( MSG_UPDATE_TIME_ID );
if( isVisible() && !isInAmbientMode() ) {
mTimeHandler.sendEmptyMessage( MSG_UPDATE_TIME_ID );
}
}
private void drawTimeText( Canvas canvas ) {
String timeText = getHourString() + ":" + String.format( "%02d", mDisplayTime.minute );
if( isInAmbientMode() || mIsInMuteMode ) {
timeText += ( mDisplayTime.hour < 12 ) ? "AM" : "PM";
} else {
timeText += String.format( ":%02d", mDisplayTime.second);
}
canvas.drawText( timeText, mXOffset, mYOffset, mTextColorPaint );
}
private String getHourString() {
if( mDisplayTime.hour % 12 == 0 )
return "12";
else if( mDisplayTime.hour <= 12 )
return String.valueOf( mDisplayTime.hour );
else
return String.valueOf( mDisplayTime.hour - 12 );
}
@Override
public void onVisibilityChanged(boolean visible) {
super.onVisibilityChanged(visible);
/* the watch face became visible or invisible */
if( visible ) {
if( !mHasTimeZoneReceiverBeenRegistered ) {
IntentFilter filter = new IntentFilter( Intent.ACTION_TIMEZONE_CHANGED );
InspirationalQuotesWatchFaceService.this.registerReceiver( mTimeZoneBroadcastReceiver, filter );
mHasTimeZoneReceiverBeenRegistered = true;
}
mDisplayTime.clear( TimeZone.getDefault().getID() );
mDisplayTime.setToNow();
} else {
if( mHasTimeZoneReceiverBeenRegistered ) {
SomethingQuotesWatchFaceService.this.unregisterReceiver( mTimeZoneBroadcastReceiver );
mHasTimeZoneReceiverBeenRegistered = false;
}
}
updateTimer();
}
@Override
public void onApplyWindowInsets(WindowInsets insets) {
super.onApplyWindowInsets(insets);
mYOffset = getResources().getDimension( R.dimen.y_offset );
if( insets.isRound() ) {
mXOffset = getResources().getDimension( R.dimen.x_offset_round );
} else {
mXOffset = getResources().getDimension( R.dimen.x_offset_square );
}
}
}