- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
2022我的新年愿望创意句子文案 2022我的新年愿望创意句子文案怎么写
-
亚太日报■自称不能给伴侣过生日难过,杜特尔特怒批隔离令下有人喝酒斗鸡
-
|进球网: 梅西中止续约谈判是为表达不满 巴托梅乌面临提前卸任
-
主动|果然“炒股不如买基金”!指数跌超40%,股基竟然翻倍赚!前3季度、近3年和5年最赚钱基金50强来了!
-
消化疾病|肝脏是否健康?看手脚4处变化,若你全占,或肝脏已经“垮”了
-
-
-
那些双胞胎甚至三胞胎、多胞胎一样的茶,让我眼花缭乱
-
加速器|多部门密集部署政策 消费复苏“加速器”启动
-
-
诈骗|假称出租房屋代为配置家电,骗走业主数十万用于还债,1人已落网
-
快了棒棒糖|展现疫情笼罩下的美国,美国时代周刊又一封面
-
-
-
#科技数码迷#11夺冠荣耀V30Pro垫底,本周最畅销单品分析,iPhone
-
-
陨石|陨石坠落现场!一颗流星划破长空,北欧三国夜空亮如白昼
-
#cnBeta#微软取消原定于3月在墨尔本举行的IoT in Action大会,受疫情影响
-
【车家号】本田又一中型SUV亮相,搭3.5升V6发动机+四驱系统!,对标汉兰达
-