- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
A00级纯电汽车重新占领市场,五菱宏光MINI销量连增
-
有人就有江湖,有江湖必有这道菜,大厨教你地道做法,美味下饭
-
新民晚报|品老字号美食 读百年人文故事 《上海老味道续集》新书发布
-
公安部通报坠湖事件|公安部通报坠湖事件 通报内容是?
-
大学|大学里“最烧脑”的3个专业,期末很容易挂科,但毕业不怕没工作
-
「裸体上街」伦敦男子裸体上街 仅用口罩遮敏感部位(图)
-
数码王者|为什么更多人选择了荣耀30?而不是红米K30Pro?
-
-
央视新闻|印尼松巴岛地区发生5.4级地震 震源深度10千米
-
【cnBetaTB】Teams认证的会议硬件,微软宣布一批新获得Microsoft
-
-
小鸽子开家老是丢怎么办-怎么防止刚开家的幼鸽乱落-
-
-
-
-
airpods怎么改名,airpods 3怎么改名
-
-
-
「二十国集团领导人年度峰会」沙特宣布二十国集团领导人年度峰会将以视频方式举行
-
家有汽车▲搭爱信8AT,颜值远超红旗H5,传祺再亮“王炸”!车长超5米