我正在使用带有Centos的Cloudera VM设置一个Hadoop集群。它使用Eclipse Luna。
我有一个与Pig一起使用的UDF。这是我第一次为Pig编写UDF。以前的Pig脚本在没有UDF的情况下运行良好。当我运行这个猪脚本时,我收到以下错误:
无法生成逻辑计划。嵌套异常: org.apache.pig.backend.executionengine.ExecException: 错误1070:无法使用导入解析EasyDates.EasyDateMethods.exec: [,java.lang。,org.apache.pig.builtin。,org.apache.pig.impl.builtin。]
在以“CALC_UR_DAYS_BETWEEN”开头的pig脚本中发生此错误。见下文。
我花了3-4个小时在互联网上搜索(和测试),他们都参考了 - 正确设置Classpath, - 确保注册UDF, - 确保jar文件名与包名相同, - 确保包名称是工作路径中的目录,并且与包名称相同。
我已经完成了所有这些但我仍然得到了错误。
据我所知,一切都被恰当地命名,应该在哪里:
几个小时后我已经筋疲力尽了。我找不到任何其他的尝试。非常感谢任何其他见解!
Java来源:
package EasyDates;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.WrappedIOException;
public class EasyDateMethods extends EvalFunc <String> {
public String exec(Tuple input) throws IOException {
if (input == null || input.size() == 0)
return "0";
try{
Date date1;
Date date2;
String strDiff="0";
int intDiff = 0;
//Get the two string dates from the tuple:
String strDate1 = (String)input.get(0);
String strDate2 = (String)input.get(1);
//Convert them to Dates
date1 = stringToDate(strDate1);
date2 = stringToDate(strDate2);
//The the date difference:
intDiff = getDaysBetween(date1, date2);
//Since I must return the same data type as I call for this Pig method, this converts the
//difference in days to a string.
return Integer.toString(intDiff);
}catch(Exception e){
throw WrappedIOException.wrap("Caught exception processing input row ", e);
}
}
private Date stringToDate(String theDateString) {
//Make sure the Pig script formats the date format this way or whatever format you choose.
//Just make sure they agree.
SimpleDateFormat dateFormatter = new SimpleDateFormat ( "dd-MMM-yyyy" );
String dateInString = "12-May-2014";
Date theDate;
java.util.Date dateObject = null;
try {
dateObject = dateFormatter.parse ( theDateString );
System.out.println( dateObject );
System.out.println( dateFormatter.format ( dateObject ) );
//theDate = dateFormatter.format ( dateObject );
} catch ( Exception e) {
System.out.println( e.getMessage() + " " + e.getStackTrace() );
};
return dateObject ;
}
static int getDaysBetween(Date curDate, Date prevDate) {
//Precondition: the difference in days between the current meter read date and the last one is not known
//Postcondition: the difference in days between the current meter read date and the last one is known
Calendar currentDate = Calendar.getInstance();
Calendar previousDate = Calendar.getInstance();
currentDate.setTime(curDate);
previousDate.setTime(prevDate);
int theDiffinDays = 0;
int theDiffinYears = 0;
int currentDay;
int previousDay;
int currentYear;
int previousYear;
try {
currentDay = currentDate.get(Calendar.DAY_OF_YEAR);
System.out.println("currentDay is " + currentDay);
previousDay = previousDate.get(Calendar.DAY_OF_YEAR);
System.out.println("previousDay is " + previousDay);
currentYear = currentDate.get(Calendar.YEAR);
System.out.println("currentYear is " + currentYear);
previousYear = previousDate.get(Calendar.YEAR);
System.out.println("previousYear is " + previousYear);
if (currentYear == previousYear) {
theDiffinDays = currentDay - previousDay;
}
else
{
theDiffinYears = currentYear - previousYear;
//This assumes 2 contiguous years, eg 2016 and 2017; so this wouldn't work if the diff in years is greater than 1
if (isLeapYear(previousYear)) {
//The following has not been corrected for leap year:
//If the previous year is a leap year
theDiffinDays = 366 - previousDay + currentDay;
}
else {
//If the current year is a leap year or neither year is a leap year: (because the day of year should be inherent whether leap or not)
theDiffinDays = 365 - previousDay + currentDay;
}
}
//return theDiffinDays;
}
catch (Exception ex){
System.out.println(ex.getMessage() + " " + ex.getStackTrace());
}
return theDiffinDays;
}
private static boolean isLeapYear(int theYear){
//Precondition: the year is not designated as a leap year or not
boolean ans = false;
try {
switch (theYear){
case 2004: ans = true;
break;
case 2008: ans = true;
break;
case 2012: ans = true;
break;
case 2016: ans = true;
break;
case 2020: ans = true;
break;
case 2024: ans = true;
break;
case 2028: ans = true;
break;
case 2032: ans = true;
break;
case 2036: ans = true;
break;
case 2040: ans = true;
break;
case 2044: ans = true;
break;
case 2048: ans = true;
break;
default: ans = false;
}
}
catch (Exception ex){
System.out.println(ex.getMessage() + " " + ex.getStackTrace());
}
return ans;
}
}
猪脚本:
--Simple Pig script to read in a file with dates, and pass the dates to the EasyDate class
REGISTER /home/cloudera/data/EasyDates/EasyDates.jar;
DEFINE DaysBetween EasyDates.EasyDateMethods;
----------------------------------------------------Load the file--------------------------------------------
--The file needs two different dates in one row for this test
devicePageCountAll = LOAD 'Data_For_Test_Jar.txt' USING PigStorage('\t')
AS (
account_code:chararray,
serial_number:chararray,
reported_date:chararray,
reported_date2:chararray);
--dump devicePageCountAll;
--------------------------------------------------Get the date difference in days and store the result-----------------
devicePageCountAll2 = foreach devicePageCountAll {
CALC_UR_DAYS_BETWEEN = DaysBetween((ToString(REPLACE(reported_date, '\\"', ''), 'yyyy-MM-dd')), (ToString(REPLACE(reported_date2, '\\"', ''), 'yyyy-MM-dd')));
generate
account_code,
serial_number,
reported_date,
reported_date2,
(CALC_UR_DAYS_BETWEEN > 15000 ? 0 : CALC_UR_DAYS_BETWEEN) AS days_since_last_reported;
}
dump devicePageCountAll2;
谢谢!
答案 0 :(得分:1)
而不是这个
DEFINE DaysBetween EasyDates.EasyDateMethods;
尝试
DEFINE DaysBetween EasyDates.EasyDateMethods();