通过jsoup从div(html)获取除div标签之外的数据

时间:2016-11-10 11:00:19

标签: java jsoup

我有HTML代码:

java.lang.IllegalStateException: Unable to perform operation: resolve on com.jive.services.jiveid.auth.oauth.models.AuthorizeParams
at org.jvnet.hk2.internal.ClazzCreator.create(ClazzCreator.java:386)
at org.jvnet.hk2.internal.SystemDescriptor.create(SystemDescriptor.java:471)
at org.glassfish.jersey.process.internal.RequestScope.findOrCreate(RequestScope.java:162)
at org.jvnet.hk2.internal.Utilities.createService(Utilities.java:2072)
at org.jvnet.hk2.internal.ServiceHandleImpl.getService(ServiceHandleImpl.java:114)
at org.jvnet.hk2.internal.ServiceHandleImpl.getService(ServiceHandleImpl.java:88)
at org.glassfish.jersey.server.internal.inject.BeanParamValueFactoryProvider$BeanParamValueFactory.provide(BeanParamValueFactoryProvider.java:118)
at org.glassfish.jersey.server.spi.internal.ParamValueFactoryWithSource.provide(ParamValueFactoryWithSource.java:71)
at org.glassfish.jersey.server.spi.internal.ParameterValueHelper.getParameterValues(ParameterValueHelper.java:90)
at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$AbstractMethodParamInvoker.getParamValues(JavaResourceMethodDispatcherProvider.java:127)
at org.glassfish.jersey.server.model.internal.JavaResourceMethodDispatcherProvider$ResponseOutInvoker.doDispatch(JavaResourceMethodDispatcherProvider.java:160)
at org.glassfish.jersey.server.model.internal.AbstractJavaResourceMethodDispatcher.dispatch(AbstractJavaResourceMethodDispatcher.java:99)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.invoke(ResourceMethodInvoker.java:389)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:347)
at org.glassfish.jersey.server.model.ResourceMethodInvoker.apply(ResourceMethodInvoker.java:102)
at org.glassfish.jersey.server.ServerRuntime$2.run(ServerRuntime.java:326)
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:271)
at org.glassfish.jersey.internal.Errors$1.call(Errors.java:267)
at org.glassfish.jersey.internal.Errors.process(Errors.java:315)
at org.glassfish.jersey.internal.Errors.process(Errors.java:297)
at org.glassfish.jersey.internal.Errors.process(Errors.java:267)
at org.glassfish.jersey.process.internal.RequestScope.runInScope(RequestScope.java:317)
at org.glassfish.jersey.server.ServerRuntime.process(ServerRuntime.java:305)
at org.glassfish.jersey.server.ApplicationHandler.handle(ApplicationHandler.java:1154)
at org.glassfish.jersey.servlet.WebComponent.serviceImpl(WebComponent.java:471)
at org.glassfish.jersey.servlet.WebComponent.service(WebComponent.java:425)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:383)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:336)
at org.glassfish.jersey.servlet.ServletContainer.service(ServletContainer.java:223)
at io.dropwizard.jetty.NonblockingServletHolder.handle(NonblockingServletHolder.java:49)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1669)
at io.dropwizard.servlets.ThreadNameFilter.doFilter(ThreadNameFilter.java:29)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652)
at io.dropwizard.jersey.filter.AllowedMethodsFilter.handle(AllowedMethodsFilter.java:43)
at io.dropwizard.jersey.filter.AllowedMethodsFilter.doFilter(AllowedMethodsFilter.java:38)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652)
at org.eclipse.jetty.servlets.CrossOriginFilter.handle(CrossOriginFilter.java:257)
at org.eclipse.jetty.servlets.CrossOriginFilter.doFilter(CrossOriginFilter.java:220)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:316)
at org.springframework.web.filter.CompositeFilter$VirtualFilterChain.doFilter(CompositeFilter.java:107)
at com.jive.services.jiveid.common.extensions.security.LemurCertificateFilter.doFilter(LemurCertificateFilter.java:52)
at org.springframework.web.filter.CompositeFilter$VirtualFilterChain.doFilter(CompositeFilter.java:112)
at com.jive.services.jiveid.common.extensions.CorrelationIdFilter.doFilterInternal(CorrelationIdFilter.java:51)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.web.filter.CompositeFilter$VirtualFilterChain.doFilter(CompositeFilter.java:112)
at com.jive.services.jiveid.common.extensions.security.CorsFilter.doFilterInternal(CorsFilter.java:27)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.web.filter.CompositeFilter$VirtualFilterChain.doFilter(CompositeFilter.java:112)
at org.springframework.web.filter.RequestContextFilter.doFilterInternal(RequestContextFilter.java:99)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.web.filter.CompositeFilter$VirtualFilterChain.doFilter(CompositeFilter.java:112)
at org.springframework.web.filter.CompositeFilter.doFilter(CompositeFilter.java:73)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.invoke(FilterSecurityInterceptor.java:126)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.doFilter(FilterSecurityInterceptor.java:90)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.access.ExceptionTranslationFilter.doFilter(ExceptionTranslationFilter.java:114)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.authentication.AnonymousAuthenticationFilter.doFilter(AnonymousAuthenticationFilter.java:111)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.servletapi.SecurityContextHolderAwareRequestFilter.doFilter(SecurityContextHolderAwareRequestFilter.java:169)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.header.HeaderWriterFilter.doFilterInternal(HeaderWriterFilter.java:64)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.context.request.async.WebAsyncManagerIntegrationFilter.doFilterInternal(WebAsyncManagerIntegrationFilter.java:53)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.context.SecurityContextPersistenceFilter.doFilter(SecurityContextPersistenceFilter.java:91)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.FilterChainProxy.doFilterInternal(FilterChainProxy.java:213)
at org.springframework.security.web.FilterChainProxy.doFilter(FilterChainProxy.java:176)
at org.springframework.web.filter.DelegatingFilterProxy.invokeDelegate(DelegatingFilterProxy.java:346)
at org.springframework.web.filter.DelegatingFilterProxy.doFilter(DelegatingFilterProxy.java:262)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1652)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:585)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1127)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:515)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1061)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)
at com.codahale.metrics.jetty9.InstrumentedHandler.handle(InstrumentedHandler.java:240)
at io.dropwizard.jetty.RoutingHandler.handle(RoutingHandler.java:51)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)
at org.eclipse.jetty.server.handler.RequestLogHandler.handle(RequestLogHandler.java:95)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)
at org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:159)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:97)
at org.eclipse.jetty.server.Server.handle(Server.java:499)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:311)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:257)
at org.eclipse.jetty.io.AbstractConnection$2.run(AbstractConnection.java:544)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:635)
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:555)
at java.lang.Thread.run(Thread.java:745)

我使用Jsoup。如何获得除div标签中的链接之外的所有链接? 我尝试做这样的事情,但它不起作用。元素包含所有链接。

<table width="100%" cellpadding="5" cellspacing="2" class="zebra"> 
  <tr>
   <td colspan="5">
    <div class="paginator">
     <a href="http://some_link">2</a>&nbsp;         
    </div>
    </td>
  </tr>   
  <tr> 
   <td><a href="//i_need_only_this_link">some_value</a></td>       
  </tr> 
  <tr>      
   <td><a href="//i_need_only_this_link1">some_value</a></td>       
  </tr> 
  <tr>
   <td colspan="2">
    <div class="paginator">        
     <a href="http://some_link">2</a>&nbsp;
    </div>
    </td>
  </tr> 
</table>

1 个答案:

答案 0 :(得分:1)

您可以使用以下代码..

Document html = Jsoup.parse(htmlStr);

    for (Element e : html.getElementsByTag("a")) {

        if (!"div".equalsIgnoreCase(e.parentNode().nodeName())) {
            System.out.println(e.attr("href"));
        }

    }

这里我检查锚元素的父节点不是div。如果它不是div我打印网址。