學(xué)習(xí)UDF編寫流程見:http://www.reibang.com/p/ff0913045610
1.截取請求地址
代碼:
package hiveUDF;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
/**
*
* 從 "GET /course/view.php?id=27 HTTP/1.1" 中獲取請求地址敞临,即 /course/view.php?id=27
*
*/
public class TruncationRequestAdd extends UDF {
public Text evaluate(Text add) {
// 過濾
if (add == null) {
return null;
}
// 按照空格分割
String[] strings = add.toString().split(" ");
// 過濾分割后長度小于3的字符
if (strings.length < 3) {
return null;
}
// 設(shè)置返回的結(jié)果
Text result = new Text(strings[1]);
return result;
}
}
-
截取主地址
代碼:
package hiveUDF;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
/**
*
* 從"http://www.ibeifeng.com/user.php?act=mycourse"提取主地址块差,即"http://www.ibeifeng.com"
*
*/
public class TruncationMainAdd extends UDF {
public Text evaluate(Text add) {
// 過濾為null的輸入
if (add == null) {
return null;
}
String address = add.toString();
// 過濾不是http://開頭的輸入
if (!address.startsWith("http://")) {
return null;
}
// 模式匹配
Pattern p = Pattern.compile("http://[^/]+(/\\S*)");
Matcher m = p.matcher(address);
// 獲取分組 即 /user.php?act=mycourse
String s = null;
if (m.find()) {
s = m.group(1);
}
// 索引
int index = address.lastIndexOf(s);
// 截取
address = address.substring(0, index);
// 結(jié)果
Text result = new Text();
// 構(gòu)造結(jié)果
result.set(address);
return result;
}
}
- 轉(zhuǎn)換日期格式
把日期格式為: "31/Aug/2015:00:04:37 +0800"
轉(zhuǎn)換為: "2015-08-31 00:04:37"
代碼:
package hiveUDF;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
/**
* 把格式為: "31/Aug/2015:00:04:37 +0800" 轉(zhuǎn)換為: "2015-08-31 00:04:37"
*/
public class DateTransform extends UDF {
private final SimpleDateFormat inputFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH);
private final SimpleDateFormat outputFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public Text evaluate(Text input) {
// 過濾
if (input == null) {
return null;
}
Text output = new Text();
String inputDate = input.toString();
try {
// parse
Date parseDate = inputFormat.parse(inputDate);
// format
String outputDate = outputFormat.format(parseDate);
// set
output.set(outputDate);
} catch (Exception e) {
e.printStackTrace();
return null;
}
return output;
}
}