Question

我是Apache骆驼的新手。我正在与Red Hat JBoss开发人员工作室11.0.0.GA合作。我正在尝试从一个表中读取大量记录，并将它们插入另一个记录中。因为我无法一次插入所有记录（我认为骆驼在插入7000条记录方面受到限制）。我用了骆驼圈。首先，我得到了所有记录。对它们进行了一些处理，并在属性上设置了结果列表。另外，我设置了一个Integer-value-property来暗示我们必须为当前迭代跳过多少条记录。现在我的问题是，在第三轮（迭代）中，交换对象已更改。我调试了它，发现exchangeId是不同的。所以我设置的属性全部消失了。

    <camelContext id="_context1" xmlns="http://camel.apache.org/schema/blueprint">
    <route id="_route1">
        <from id="_from1" uri="file:work/input"/>
        <to id="_to2" uri="sqlAbniyeh: select id , CRATERSLIST from ABNIYEH_BRIDGE "/>
        <setProperty propertyName="isThereAnyRecord">
            <simple resultType="java.lang.Boolean">true</simple>
        </setProperty>
        <loop doWhile="true" id="_loop1" >
            <simple>${exchangeProperty.isThereAnyRecord} != false</simple>                
            <process id="_process1" ref="craters"/>
            <to id="_to1" uri="sqlAbniyeh:  insert into Z_ABNIYEH_BRIDGE_CRATERS( ID , NUMBER1 , LENGTH , BRIDGE_ID) values( HIBERNATE_SEQUENCE.nextval , :#value1 , :#value2 , :#id)?batch=true"/>
        </loop>
    </route>
</camelContext>

这是我的处理方法：

    public void process(Exchange exchange) throws Exception {
    try
    {
        System.out.println("Entered Process method!");
        List<Map<String, Object>> currentList = new ArrayList<Map<String,Object>>();
        List<Map<String,Object>> newList = new ArrayList<Map<String,Object>>();
        int numberOfRecordsToSkip = 0;
        int numberOfRecordsToSkipForTheNextTime ;
        List<Map<String, Object>> currentListOnProperties = (List<Map<String,Object>>) exchange.getProperty("listOfRecords");
        numberOfRecordsToSkip = exchange.getProperty("numberOfRecordsToSkip") != null ?
                                                                    (Integer)exchange.getProperty("numberOfRecordsToSkip"): 0;

        if(currentListOnProperties != null)
        {
            newList = currentListOnProperties;              
        }
        else
        {
            // It occurs just the first time                
            currentList = (List<Map<String,Object>>)exchange.getIn().getBody();
            newList = OrganizeListForInsert(currentList);
        }

        int temp = (numberOfRecordsToSkip + NUMBER_OF_RECORDS_FOR_EACH_ROUND);
        if(temp < newList.size()) 
        {
            numberOfRecordsToSkipForTheNextTime =   temp; 
        }
        else
        {   
            numberOfRecordsToSkipForTheNextTime = numberOfRecordsToSkip + ( currentList.size() - numberOfRecordsToSkip);
            exchange.removeProperty("isThereAnyRecord");
            exchange.setProperty("isThereAnyRecord", false);
        }
        exchange.removeProperty("numberOfRecordsToSkip");
        exchange.setProperty("numberOfRecordsToSkip", new Integer(numberOfRecordsToSkipForTheNextTime));
        exchange.setProperty("listOfRecords", newList);
        List<Map<String, Object>> sublistOfNewList = 
                        new ArrayList<Map<String,Object>>(newList.subList(numberOfRecordsToSkip, numberOfRecordsToSkipForTheNextTime));
        exchange.getIn().setBody(sublistOfNewList);
    }
    catch(Exception e)
    {
        e.printStackTrace();
    }
    System.out.println("End of everything!!");
}

Answer 1

正如Bedla所说，我应该使用EIP而不是用Java编写所有内容。我使用了Splitter EIP，它适用于我的情况。这是新代码：

library(xgboost)
# Matrix for xgb: dtrain and dtest, "label" is the dependent variable
dtrain <- xgb.DMatrix(X_train, label = Y_train)
dtest <- xgb.DMatrix(X_test, label = Y_test)

best_param <- list()
best_seednumber <- 1234
best_rmse <- Inf
best_rmse_index <- 0

set.seed(123)
for (iter in 1:100) {
  param <- list(objective = "reg:linear",
                eval_metric = "rmse",
                max_depth = sample(6:10, 1),
                eta = runif(1, .01, .3), # Learning rate, default: 0.3
                subsample = runif(1, .6, .9),
                colsample_bytree = runif(1, .5, .8), 
                min_child_weight = sample(1:40, 1),
                max_delta_step = sample(1:10, 1)
  )
  cv.nround <-  1000
  cv.nfold <-  5 # 5-fold cross-validation
  seed.number  <-  sample.int(10000, 1) # set seed for the cv
  set.seed(seed.number)
  mdcv <- xgb.cv(data = dtrain, params = param,  
                 nfold = cv.nfold, nrounds = cv.nround,
                 verbose = F, early_stopping_rounds = 8, maximize = FALSE)

  min_rmse_index  <-  mdcv$best_iteration
  min_rmse <-  mdcv$evaluation_log[min_rmse_index]$test_rmse_mean

  if (min_rmse < best_rmse) {
    best_rmse <- min_rmse
    best_rmse_index <- min_rmse_index
    best_seednumber <- seed.number
    best_param <- param
  }
}

# The best index (min_rmse_index) is the best "nround" in the model
nround = best_rmse_index
set.seed(best_seednumber)
xg_mod <- xgboost(data = dtest, params = best_param, nround = nround, verbose = F)

# Check error in testing data
yhat_xg <- predict(xg_mod, dtest)
(MSE_xgb <- mean((yhat_xg - Y_test)^2))

和处理方法：

    <camelContext id="_context1" xmlns="http://camel.apache.org/schema/blueprint">
    <route id="_route1">
        <from id="_from1" uri="file:work/input"/>
        <to id="_to2" uri="sqlAbniyeh: select id , CRATERSLIST from ABNIYEH_BRIDGE "/>
        <process id="_process1" ref="craters"/>
        <split>
            <simple>${body}</simple>
            <to id="_to1" uri="sqlAbniyeh:  insert into ABNIYEH_BRIDGE_CRATERS( ID , NUMBER1 , LENGTH , BRIDGE_ID) values( HIBERNATE_SEQUENCE.nextval , :#value1 , :#value2 , :#id)"/>
        </split>
    </route>
</camelContext>

骆驼循环在第三次迭代中使用了不同的交换方式

1 个答案: