- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
11种比大熊猫还珍贵的动物 大熊猫是我国特有的珍稀动物之一
-
互联网商业洞察|确认过眼神,你是我要找的“威廉梨人”
-
李登辉|李登辉妻子7月30日再赴医院探视,院方高层闭口不谈
-
「吉祥说历史」可惜两个儿子都没听,大祸临头被杀,刘伯温临终前留下遗言
-
-
陈乔恩|陈乔恩与小9岁老公甜蜜出游,夫妻穿情侣装秀恩爱,十指紧扣超甜
-
-
日子不断前进|这妹子模仿的还挺像,搞笑GIF:还别说
-
-
影子少年|20万能开出60万的面子,全新“皇冠”TRD真香,国产后或成爆款
-
-
浦东新区区委组织部|浦东唐镇:“四史”赋能联勤联动站建设,践行“人民城市”理念
-
梦幻西游兽哥|【我说梦幻】难道梦幻西游玩家都有受虐倾向?新区很难为啥还要去玩呢?
-
阿珂小星座|这几大星座对待爱情,翻脸比翻书还快,变化无常
-
魔兽世界怀旧服安其拉奥罗技能效果职业站位一览 奥罗如何打|魔兽世界怀旧服安其拉奥罗技能效果职业站位及注意事项,奥罗如何打
-
靠边站|冬瓜南瓜靠边站,这是秋天最该吃的瓜!2块炒一盘,很多人经常吃
-
-
-
-
吧吧嘛嘛|德系神车真的从未改变?,捷达VS5碰撞得分垫底