- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
英特尔发布用轻薄型笔记本:搭载第11代Core处理器
-
-
ZAKER|美媒:10个迹象显示特朗普将会“惊喜胜利”
-
-
#武汉地铁#中国最“壕”城市! 花100亿举办世界军人运动会
-
用智能引领快乐走进新的领域|明朝猪叫啥?明朝皇帝颁发1圣旨,百姓恨骂不绝,朱元璋姓朱
-
赛事圈南京同曦连下赛季都直接放弃了吗?,同时封杀前助攻王和三分王
-
孩子|山东一女子怀孕25周产下龙凤胎,3个月后怀抱孩子竟猛扇自己耳光
-
奇趣堂下巴很窄,突然?荣耀X10真机曝光?升降镜头全面屏实锤
-
中风的“祸根”找到了,不是烟酒,医生提醒:多与这3个习惯关
-
-
搞机人▲看完华为P40Pro落后了,三星Note20概念图:首次采用双曲真全屏
-
曝张柏芝秘密领证,与三胎生父出游照流出,4岁Marcus正脸像妈
-
-
教育坤坤说|属马人“最命苦”的出生月份,你家有吗?,十马九苦
-
子皓新说|但是这些历史事件却处处充满温情,都说历史是无情的
-
长不大的小朋友|这神同步,模仿的的太神似了!简直是出神入化!,GIF爆笑图
-
咋搭配穿衣。女,典型A字身材,身高158,体重110,平时只穿裙子。表打击我!!!
-
-
逸才侃球|我依然认为登哥会赢,红队情怀!王猛:虽然天下已经被鹅城占领