兄弟姐妹计数后是否有scrapy?

时间:2015-10-06 16:45:03

标签: python xpath web-crawler scrapy

我正在试图删除以下HTML代码的标题:

 using System;
using System.Threading;
using System.Runtime.InteropServices;
using System.Collections;
using System.Linq;

namespace LowLevelAudio
{
    internal class WaveOutHelper
    {
        public static void Try(int err)
        {
            if (err != WaveNative.MMSYSERR_NOERROR)
                throw new Exception(err.ToString());
        }
    }

    public delegate void BufferFillEventHandler(IntPtr data, int size);

    public class WaveOutBuffer : IDisposable
    {
        private AutoResetEvent m_PlayEvent = new AutoResetEvent(false);
        private IntPtr m_WaveOut;
        private int m_buffersize;
        private static byte[] m_samples;
        private static double[] m_fftsamples;
        private WaveNative.WaveHdr m_Header;
        private byte[] m_HeaderData;
        private GCHandle m_HeaderHandle;
        private GCHandle m_HeaderDataHandle;
        private WaveFormat m_waveformat;
        private double[] m_fftOccurances;
        private double[] m_fftHertzlist;
        private bool m_Playing;

        public int ID
        {
            get; set;
        }



        internal static void WaveOutProc(IntPtr hdrvr, int uMsg, int dwUser, ref WaveNative.WaveHdr wavhdr, int dwParam2)
        {
            if (uMsg == WaveNative.MM_WOM_DONE)
            {
                try
                {
                    GCHandle h = (GCHandle)wavhdr.dwUser;
                    WaveOutBuffer buf = (WaveOutBuffer)h.Target;
                    buf.OnPlayCompleted();
                }
                catch(Exception ex)
                {
                    Console.WriteLine("Exception: " + ex.Message);
                }
            }
        }

        public WaveOutBuffer(IntPtr waveOutHandle, int size, WaveFormat format)
        {
            m_WaveOut = waveOutHandle;
            m_waveformat = format;
            m_HeaderHandle = GCHandle.Alloc(m_Header, GCHandleType.Pinned);
            m_Header.dwUser = (IntPtr)GCHandle.Alloc(this);
            m_HeaderData = new byte[size];
            m_HeaderDataHandle = GCHandle.Alloc(m_HeaderData, GCHandleType.Pinned);
            m_Header.lpData = m_HeaderDataHandle.AddrOfPinnedObject();
            m_Header.dwBufferLength = size;
            m_buffersize = size;
            m_samples = new byte[m_buffersize];
            WaveOutHelper.Try(WaveNative.waveOutPrepareHeader(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header)));
        }
        ~WaveOutBuffer()
        {
            Dispose();
        }

        public int Size
        {
            get { return m_Header.dwBufferLength; }
        }

        public IntPtr Data
        {
            get { return m_Header.lpData; }
        }

        public void Dispose()
        {
            if (m_Header.lpData != IntPtr.Zero)
            {
                WaveNative.waveOutUnprepareHeader(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header));
                m_HeaderHandle.Free();
                m_Header.lpData = IntPtr.Zero;
            }
            m_PlayEvent.Close();
            if (m_HeaderDataHandle.IsAllocated)
                m_HeaderDataHandle.Free();
        }

        public bool Play()
        {
            lock(this) // works, but has to be fine tuned... (to do)
            {
                m_PlayEvent.Reset();
                m_Playing = WaveNative.waveOutWrite(m_WaveOut, ref m_Header, Marshal.SizeOf(m_Header)) == WaveNative.MMSYSERR_NOERROR;
                if (!m_Playing)
                    throw new Exception("test exception");
                return m_Playing;
            }
        }

        public void WaitForMe()
        {
            Console.WriteLine(this.ID + " WaitFor()");
            if (m_Playing)
            {
                m_Playing = m_PlayEvent.WaitOne();
            }
            else
            {
                m_Playing = false;
            }
        }

        public void OnPlayCompleted()
        {
            Console.WriteLine(this.ID + " OnCompleted()");
            m_PlayEvent.Set();
            m_Playing = false;
        }
    }

    public class WaveOutPlayer : IDisposable
    {
        private IntPtr m_WaveOut;
        private WaveOutBuffer[] m_bufferlist;
        private Thread m_Thread;
        private BufferFillEventHandler m_FillProc;
        private bool m_Finished;
        private byte m_zero;
        private int m_buffercount = 0;
        private int m_buffersize = 0;
        private WaveFormat m_waveformat; 

        private WaveNative.WaveDelegate m_BufferProc = new WaveNative.WaveDelegate(WaveOutBuffer.WaveOutProc);

        public static int DeviceCount
        {
            get { return WaveNative.waveOutGetNumDevs(); }
        }

        public WaveOutPlayer(int device, WaveFormat format, int bufferSize, BufferFillEventHandler fillProc)
        {
            m_zero = format.wBitsPerSample == 8 ? (byte)128 : (byte)0;
            m_FillProc = fillProc;
            m_buffercount = 2;
            m_buffersize = bufferSize;
            m_waveformat = format;
            WaveOutHelper.Try(WaveNative.waveOutOpen(out m_WaveOut, device, format, m_BufferProc, 0, WaveNative.CALLBACK_FUNCTION));
            AllocateBuffers(bufferSize, m_buffercount, format);
            m_Thread = new Thread(new ThreadStart(ThreadProc));
            m_Thread.Start();
        }

        ~WaveOutPlayer()
        {
            Dispose();
        }

        public void Dispose()
        {
            if (m_Thread != null)
            {
                try
                {
                    m_Finished = true;
                    if (m_WaveOut != IntPtr.Zero)
                        WaveNative.waveOutReset(m_WaveOut);
                    m_Thread.Join();
                    m_FillProc = null;
                    FreeBuffers();
                    if (m_WaveOut != IntPtr.Zero)
                        WaveNative.waveOutClose(m_WaveOut);
                }
                finally
                {
                    m_Thread = null;
                    m_WaveOut = IntPtr.Zero;
                }
            }
        }

        private void ThreadProc()
        {
            WaveOutBuffer b0 = m_bufferlist[0];
            WaveOutBuffer b1 = m_bufferlist[1];
            MainForm form = Program.getUI();

            bool s = true;
            m_FillProc(b0.Data, b0.Size);

            while (!m_Finished)
            {
                if (s)
                {
                    Console.WriteLine("-------------------------");
                    Console.WriteLine("Playing b0, filling b1");
                    b0.Play();
                    m_FillProc(b1.Data, b1.Size);

                    form.paintEqualizer(b0);
                    Console.WriteLine("callng waitFor on b0");
                    b0.WaitForMe();
                }
                else
                {
                    Console.WriteLine("-------------------------");
                    Console.WriteLine("Playing b1, filling b0");
                    b1.Play();
                    m_FillProc(b0.Data, b0.Size);

                    form.paintEqualizer(b1);
                    Console.WriteLine("callng waitFor on b1");
                    b1.WaitForMe();

                }
                s = !s;
            }
        }

        private void AllocateBuffers(int bufferSize, int bufferCount, WaveFormat format)
        {
            FreeBuffers();
            m_bufferlist = new WaveOutBuffer[m_buffercount];
            if (bufferCount > 0)
            {
                for (int i = 0; i < m_buffercount; i++)
                {
                    m_bufferlist[i] = new WaveOutBuffer(m_WaveOut, bufferSize, format);
                    m_bufferlist[i].ID = i;
                }
            }
        }

        private void FreeBuffers()
        {
            if (m_bufferlist != null)
            {
                foreach (WaveOutBuffer currentBuffer in m_bufferlist)
                {
                    if (currentBuffer != null)
                    {
                        currentBuffer.Dispose();
                    }
                }
            }
        }
    }
}

我正在使用此代码:

<FONT COLOR=#5FA505><B>Claim:</B></FONT> &nbsp; Coed makes unintentionally risqu&eacute; remark about professor's "little quizzies."
<BR><BR>
<CENTER><IMG SRC="/images/content-divider.gif"></CENTER>

并且我成功地从前面提到的html代码中提取了我想要的正确def parse_article(self, response): for href in response.xpath('//font[b = "Claim:"]/following-sibling::text()'): print href.extract() 值,但它也(在同一页面中具有类似结构的其他)中拉出了下面的html。我定义了我的Claim:,只需提取名为xpath()的{​​{1}}标记,那么为什么它也会引入下面的font?我该如何解决?我试过看看我是否只能获得下一个Claim:而不是所有这些,但这不起作用

Origins

2 个答案:

答案 0 :(得分:0)

我认为您的xpath缺少text()限定符(解释here)。它应该是:

'//font/[b/text()="Claim:"]/following-sibling::text()'

答案 1 :(得分:0)

following-sibling轴返回元素后面的所有兄弟节点。如果您只想要第一个兄弟,请尝试XPath表达式:

//font[b = "Claim:"]/following-sibling::text()[1]

或者,根据您的具体用例:

(//font[b = "Claim:"]/following-sibling::text())[1]