- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
无话不谈|微信的颠覆者跨越时代而来,腾讯能逃脱被颠覆的命运吗?
-
「广播机构」香港电台顾问团拿港警说事要求增拨资源,立法会议
-
痛心!浙江1岁男娃特重度烫伤,仍在ICU抢救!奶奶哭成泪人:都怪我
-
飞鱼财经评论|如何孵化国产珠宝品牌?,IPO观察丨周六福珠宝:“周氏”珠宝起名惰性
-
半月谈微信公号|群众对扶贫工作“有获得却无感”?基层治理要跟上
-
阿木木侃侃看|认出了毛晓彤,却没发现关晓彤,《家有儿女》到底藏了多少大咖
-
「星晓媛」太过理智的星座女,即使爱上一个人,也不会盲目深陷,太过成熟
-
明星八卦|黑芝麻糊广告舔碗小男孩,长大模样惊人,网友遗憾:岁月是把杀猪刀
-
-
-
极客科技控骨传导是什么梗?比普通运动耳机有何不同?南卡Runner cc测评
-
-
-
-
乔丹|第四冠到手,詹姆斯历史第几了?离乔丹还有多远?
-
詹姆斯|詹姆斯轻松撞开约基奇,力量联盟最强?听听姚明当年怎么说
-
-
还珠格格|《还珠格格》中的6位病美人,生了病的小燕子简直是全剧最美的了
-
我是一片孤独的云|徐璐为证拥有“超模腰”,就把20cm短衫穿身上,也真是厉害了
-
三昧|《西游记》中红孩儿的厉害另有所在,非三昧真火,却让孙悟空丧命