如何以多线程方式从rpt生成PDF文档?

时间:2015-12-23 15:35:32

标签: java multithreading pdf crystal-reports export

我有一个rpt文件,使用它我将以pdf格式生成多个报告。使用inet清除报告中的Engine类。这个过程需要很长时间,因为我要生成近10000个报告。我可以使用Mutli线程或其他方法来加速这个过程吗?

任何有关如何做到这一点的帮助都会有所帮助

我的部分代码。

 //Loops
 Engine eng = new Engine(Engine.EXPORT_PDF);
 eng.setReportFile(rpt); //rpt is the report name
 if (cn.isClosed() || cn == null ) {
    cn = ds.getConnection();
 }
 eng.setConnection(cn);
 System.out.println(" After set connection");
 eng.setPrompt(data[i], 0);
 ReportProperties repprop = eng.getReportProperties();
 repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION, ReportProperties.PAPER_FANFOLD_US);
 eng.execute();
 System.out.println(" After excecute");
 try {
      PDFExportThread pdfExporter = new PDFExportThread(eng, sFileName, sFilePath);
      pdfExporter.execute();
 } catch (Exception e) {
      e.printStackTrace();
 }

PDFExportThread执行

 public void execute() throws IOException {
      FileOutputStream fos = null;
      try {
           String FileName = sFileName + "_" + (eng.getPageCount() - 1);
           File file = new File(sFilePath + FileName + ".pdf");
           if (!file.getParentFile().exists()) {
                file.getParentFile().mkdirs();
           }
           if (!file.exists()) {
                file.createNewFile();
           }
           fos = new FileOutputStream(file);
           for (int k = 1; k <= eng.getPageCount(); k++) {
                fos.write(eng.getPageData(k));
           }
           fos.flush();
           fos.close();
      } catch (Exception e) {
           e.printStackTrace();
      } finally {
           if (fos != null) {
                fos.close();
                fos = null;
           }
      }
 }

2 个答案:

答案 0 :(得分:2)

这是一个非常基本的代码。在池中具有固定大小线程的ThreadPoolExecutor是主干。

一些注意事项:

  1. 线程池大小应等于或小于数据库连接池大小。并且,它应该是并行引擎合理的最佳数字。
  2. 主线程应该等待足够的时间才能杀死所有线程。我已经花了1个小时作为等待时间,但这只是一个例子。
  3. 您需要进行适当的异常处理。
  4. 从API文档中,我看到了Engine类中的stopAll和shutdown方法。所以,一旦我们的工作完成,我就会调用它。这又是一个例子。
  5. 希望这有帮助。

    import java.io.File;
    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.sql.Connection;
    import java.util.concurrent.Executors;
    import java.util.concurrent.ThreadPoolExecutor;
    import java.util.concurrent.TimeUnit;
    
    public class RunEngine {
        public static void main(String[] args) throws Exception {
            final String rpt = "/tmp/rpt/input/rpt-1.rpt";
            final String sFilePath = "/tmp/rpt/output/";
            final String sFileName = "pdfreport";
            final Object[] data = new Object[10];
    
            ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
            for (int i = 0; i < data.length; i++) {
                PDFExporterRunnable runnable = new PDFExporterRunnable(rpt, data[i], sFilePath, sFileName, i);
                executor.execute(runnable);
            }
            executor.shutdown();
            executor.awaitTermination(1L, TimeUnit.HOURS);
            Engine.stopAll();
            Engine.shutdown();
        }
        private static class PDFExporterRunnable implements Runnable {
            private final String rpt;
            private final Object data;
            private final String sFilePath;
            private final String sFileName;
            private final int runIndex;
    
    
            public PDFExporterRunnable(String rpt, Object data, String sFilePath,
                    String sFileName, int runIndex) {
                this.rpt = rpt;
                this.data = data;
                this.sFilePath = sFilePath;
                this.sFileName = sFileName;
                this.runIndex = runIndex;
            }
    
            @Override
            public void run() {
                // Loops
                Engine eng = new Engine(Engine.EXPORT_PDF);
                eng.setReportFile(rpt); // rpt is the report name
                Connection cn = null;
    
                /*
                 * DB connection related code. Check and use.
                 */
                //if (cn.isClosed() || cn == null) {
                    //cn = ds.getConnection();
                //}
                eng.setConnection(cn);
                System.out.println(" After set connection");
    
                eng.setPrompt(data, 0);
                ReportProperties repprop = eng.getReportProperties();
                repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION,
                        ReportProperties.PAPER_FANFOLD_US);
                eng.execute();
                System.out.println(" After excecute");
                FileOutputStream fos = null;
                try {
                    String FileName = sFileName + "_" + runIndex;
                    File file = new File(sFilePath + FileName + ".pdf");
                    if (!file.getParentFile().exists()) {
                        file.getParentFile().mkdirs();
                    }
                    if (!file.exists()) {
                        file.createNewFile();
                    }
                    fos = new FileOutputStream(file);
                    for (int k = 1; k <= eng.getPageCount(); k++) {
                        fos.write(eng.getPageData(k));
                    }
                    fos.flush();
                    fos.close();
                } catch (Exception e) {
                    e.printStackTrace();
                } finally {
                    if (fos != null) {
                        try {
                            fos.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                        fos = null;
                    }
                }
            }
        }
        /*
         * Dummy classes to avoid compilation errors.
         */
        private static class ReportProperties {
            public static final String PAPER_FANFOLD_US = null;
            public static final String DEFAULT_PAPER_ORIENTATION = null;
            public void setPaperOrient(String defaultPaperOrientation, String paperFanfoldUs) {
            }
        }
    
        private static class Engine {
            public static final int EXPORT_PDF = 1;
            public Engine(int exportType) {
            }
            public static void shutdown() {
            }
            public static void stopAll() {
            }
            public void setPrompt(Object singleData, int i) {
            }
            public byte[] getPageData(int k) {
                return null;
            }
            public int getPageCount() {
                return 0;
            }
            public void execute() {
            }
            public ReportProperties getReportProperties() {
                return null;
            }
            public void setConnection(Connection cn) {
            }
            public void setReportFile(String reportFile) {
            }
        }
    }
    

答案 1 :(得分:1)

我会提供这个“答案”作为一个可能的快速&amp;肮脏的解决方案,让您开始并行化工作。

您将构建一个渲染农场。 我不认为在java中有一个简单的方法可以做到这一点;我希望有人发布一个答案,展示如何在几行代码中并行化您的示例。但在此之前,这有望帮助您取得一些进展。

您将在同一个JVM实例中进行有限的扩展。 但是......让我们看看你能走多远,看看它是否足够。

设计挑战#1:重新启动。

您可能想要一个地方来保存每个报告的状态,例如“工作单位”。

如果您需要重新启动所有内容(可能是您的服务器崩溃)并且您不想重新运行到目前为止的所有报告,您需要这样做。

你有很多方法可以做到这一点;数据库,检查报告文件夹中是否存在“已完成”文件(不足以存在* .pdf,因为这可能是不完整的...对于xyz_200.pdf,您可以将空xyz_200.done或xyz_200 .err文件,以帮助重新运行任何问题子... ...当您编写该文件操作/检查/初始化逻辑时,似乎可能更容易向您的数据库添加列,其中包含列表工作待完成。)

设计考虑因素#2:最大化吞吐量(避免过载)。

您不希望使系统饱和并且并行运行一千个报告。 也许10。
也许100。
可能不是5,000。
您需要进行一些规模研究,看看是什么让您接近80%到90%的系统利用率。

设计考虑因素#3:跨多个服务器进行扩展

过于复杂,超出了Stack Exchange答案的范围。 您必须在运行类似下面的工作程序的多个系统上启动JVM,并且可以从共享的“队列”结构中提取工作项的报表管理器,再次数据库表可能比执行某些文件更容易基于(或网络馈送)。

示例代码

警告:这些代码都没有经过充分测试,它几乎肯定会有大量的拼写错误,逻辑错误和糟糕的设计。使用风险自负。

所以无论如何......我确实想给你一个基本的任务跑步者的基本想法。   用以下代码替换问题中的“//循环”示例:

主循环(原始代码示例)

这或多或少做了你的示例代码所做的事情,修改后将大部分工作推送到ReportWorker(新类,见下文)。很多东西似乎都包含在原始问题的“// Loop”示例中,所以我不打算对其进行逆向工程。

fwiw,我不清楚“rpt”和“data [i]”来自哪里,所以我修改了一些测试数据。

public class Main {

   public static boolean complete( String data ) {
      return false; // for testing nothing is complete.
   }

    public static void main(String args[] ) {

    String data[] = new String[] { 
         "A",
         "B",
         "C",
         "D",
         "E" };
    String rpt = "xyz";

    // Loop
    ReportManager reportMgr = new ReportManager();  // a new helper class (see below), it assigns/monitors work.
    long startTime = System.currentTimeMillis();
    for( int i = 0; i < data.length; ++i ) {
       // complete is something you should write that knows if a report "unit of  work"
       // finished successfully.
       if( !complete( data[i] ) ) {
          reportMgr.assignWork(  rpt, data[i] ); // so... where did values for your "rpt" variable come from?
       }
    }
    reportMgr.waitForWorkToFinish(); // out of new work to assign, let's wait until everything in-flight complete.
    long endTime = System.currentTimeMillis();
    System.out.println("Done.  Elapsed time = " + (endTime - startTime)/1000 +" seconds.");

   }

}

ReportManager

这个类不是线程安全的,只是让你的原始循环继续调用assignWork()直到你没有报告来分配然后继续调用它直到完成所有工作,例如waitForWorkToFinish(),如上所示。 (fwiw,我认为你不能说这里的任何类都特别是线程安全的。)

public class ReportManager {

   public int polling_delay = 500; // wait 0.5 seconds for testing.
   //public int polling_delay = 60 * 1000; // wait 1 minute.
   // not high throughput millions of reports / second, we'll run at a slower tempo.
   public int nWorkers = 3; // just 3 for testing.
   public int assignedCnt = 0;
   public ReportWorker workers[];

   public ReportManager() {
      // initialize our manager.
      workers = new ReportWorker[ nWorkers ];
      for( int i = 0; i < nWorkers; ++i ) {
         workers[i] = new ReportWorker( i );
         System.out.println("Created worker #"+i);
      }
   }

   private ReportWorker handleWorkerError( int i  ) {
      // something went wrong, update our "report" status as one of the reports failed.
      System.out.println("handlerWokerError(): failure in "+workers[i]+", resetting worker.");
      workers[i].teardown();
      workers[i] = new ReportWorker( i ); // just replace everything.
      return workers[i]; // the new worker will, incidentally, be avaialble.
   }

   private ReportWorker handleWorkerComplete( int i ) {
      // this unit of work was completed, update our "report" status tracker as success.
      System.out.println("handleWorkerComplete(): success in "+workers[i]+", resetting worker.");
      workers[i].teardown();
      workers[i] = new ReportWorker( i ); // just replace everything.
      return workers[i]; // the new worker will, incidentally, be avaialble.
   }

   private int activeWorkerCount() {
      int activeCnt = 0;
      for( int i = 0; i < nWorkers; ++i ) {
         ReportWorker worker = workers[i];
         System.out.println("activeWorkerCount() i="+i+", checking worker="+worker);
         if( worker.hasError() ) {
            worker = handleWorkerError( i );
         }
         if( worker.isComplete() ) {
            worker = handleWorkerComplete( i );
         }
         if( worker.isInitialized() || worker.isRunning() ) {
            ++activeCnt;
         }
      }
      System.out.println("activeWorkerCount() activeCnt="+activeCnt);
      return activeCnt;
   }

   private ReportWorker getAvailableWorker() {
      // check each worker to see if anybody recently completed...
      // This (rather lazily) creates completely new ReportWorker instances.
      // You might want to try pooling (salvaging and reinitializing them)
      // to see if that helps your performance.

      System.out.println("\n-----");
      ReportWorker firstAvailable = null;
      for( int i = 0; i < nWorkers; ++i ) {
         ReportWorker worker = workers[i];
         System.out.println("getAvailableWorker(): i="+i+" worker="+worker);
         if( worker.hasError() ) {
            worker = handleWorkerError( i );
         }
         if( worker.isComplete() ) {
            worker = handleWorkerComplete( i );
         }
         if( worker.isAvailable() && firstAvailable==null ) {
            System.out.println("Apparently worker "+worker+" is 'available'");
            firstAvailable  = worker;
            System.out.println("getAvailableWorker(): i="+i+" now firstAvailable = "+firstAvailable);
         }
      }
      return firstAvailable;  // May (or may not) be null.
   }

   public void assignWork(  String rpt, String data ) {
      ReportWorker worker = getAvailableWorker();
      while( worker == null ) {
         System.out.println("assignWork: No workers available, sleeping for "+polling_delay);
         try { Thread.sleep( polling_delay ); }
         catch( InterruptedException e ) { System.out.println("assignWork: sleep interrupted, ignoring exception "+e); }
         // any workers avaialble now?
         worker = getAvailableWorker();
      }
      ++assignedCnt;
      worker.initialize( rpt, data ); // or whatever else you need.
      System.out.println("assignment #"+assignedCnt+" given to "+worker);
      Thread t = new Thread( worker );
      t.start( ); // that is pretty much it, let it go.
   }

   public void waitForWorkToFinish() {
      int active = activeWorkerCount();
      while( active >= 1 ) {
         System.out.println("waitForWorkToFinish(): #active workers="+active+", waiting...");
         // wait a minute....
         try { Thread.sleep( polling_delay ); }
         catch( InterruptedException e ) { System.out.println("assignWork: sleep interrupted, ignoring exception "+e); }
         active = activeWorkerCount();
      }
   }
}

ReportWorker

public class ReportWorker implements Runnable {
      int test_delay = 10*1000; //sleep for 10 seconds.
      // (actual code would be generating PDF output)

      public enum StatusCodes { UNINITIALIZED,
          INITIALIZED,
          RUNNING,
          COMPLETE,
          ERROR };


      int id = -1;
      StatusCodes status = StatusCodes.UNINITIALIZED;
      boolean initialized = false;
      public String rpt = "";
      public String data = "";
      //Engine eng;
      //PDFExportThread pdfExporter;
      //DataSource_type cn;

      public boolean isInitialized() { return initialized; }
      public boolean isAvailable()   { return status == StatusCodes.UNINITIALIZED; }
      public boolean isRunning()     { return status == StatusCodes.RUNNING; }
      public boolean isComplete()    { return status == StatusCodes.COMPLETE; }
      public boolean hasError()      { return status == StatusCodes.ERROR; }


      public ReportWorker( int id ) {
          this.id = id;
      }

      public String toString( ) {
         return "ReportWorker."+id+"("+status+")/"+rpt+"/"+data;
      }

      // the example code doesn't make clear if there is a relationship between rpt & data[i].
      public void initialize( String rpt, String data /* data[i] in original code */  ) {
         try {
            this.rpt = rpt;
            this.data = data;
            /* uncomment this part where you have the various classes availble.
             * I have it commented out for testing.
            cn = ds.getConnection();   
            Engine eng = new Engine(Engine.EXPORT_PDF);
            eng.setReportFile(rpt); //rpt is the report name
            eng.setConnection(cn);
            eng.setPrompt(data, 0);
            ReportProperties repprop = eng.getReportProperties();
            repprop.setPaperOrient(ReportProperties.DEFAULT_PAPER_ORIENTATION, ReportProperties.PAPER_FANFOLD_US);
            */
            status = StatusCodes.INITIALIZED;
            initialized = true; // want this true even if we're running.
         } catch( Exception e ) {
            status = StatusCodes.ERROR;
            throw new RuntimeException("initialze(rpt="+rpt+", data="+data+")", e);
         }
      }

      public void run() {
         status = StatusCodes.RUNNING;
         System.out.println("run().BEGIN: "+this);
         try {
            // delay for testing.
            try { Thread.sleep( test_delay ); }
            catch( InterruptedException e ) { System.out.println(this+".run(): test interrupted, ignoring "+e); }
            /* uncomment this part where you have the various classes availble.
             * I have it commented out for testing.
            eng.execute();
            PDFExportThread pdfExporter = new PDFExportThread(eng, sFileName, sFilePath);
            pdfExporter.execute();
            */
            status = StatusCodes.COMPLETE;
            System.out.println("run().END: "+this);
         } catch( Exception e ) {
            System.out.println("run().ERROR: "+this);
            status = StatusCodes.ERROR;
            throw new RuntimeException("run(rpt="+rpt+", data="+data+")", e);
         }
      }

      public void teardown() {
         if( ! isInitialized() || isRunning() ) {
            System.out.println("Warning: ReportWorker.teardown() called but I am uninitailzied or running.");
            // should never happen, fatal enough to throw an exception?
         }

         /* commented out for testing.
           try { cn.close(); } 
           catch( Exception e ) { System.out.println("Warning: ReportWorker.teardown() ignoring error on connection close: "+e); }
           cn = null;
         */
         // any need to close things on eng?
         // any need to close things on pdfExporter?
      }
}