- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
周到|东航加纽约达美增底特律西雅图,美联航“一门心思”想让旧金山早点每日班 | 中美航班
-
司马懿|可以秒射手的法师排名,不知火舞上榜!第一射手不敢出泉水
-
『科客网』拍视频乐趣翻倍!华为 nova7 Pro 前后双景录像详解:成年人的福音
-
-
-
Intel|13代酷睿冲上5.5GHz 微星600系主板加入支持:Zen4来战
-
-
求职|90后大量出现“啃老族”,因公考3年不找工作,一问就是还在备考
-
电脑不工作怎么重装系统(电脑不工作怎么重装系统?)
-
-
「日本」坏了!截至5月26日12时,惊天噩耗席卷日本 ,美国:离我远点!
-
经济日报|奔跑在开放前沿——新疆全面落实“一带一路”倡议和对外开放纪实
-
郑恺|苗苗被曝畸形婚姻观!娘家5口住婚房,妹妹没分寸,郑恺表现差劲
-
『央视』欧盟已动员近3.4万亿欧元用于欧盟经济振兴
-
-
德业弥珍|“蜜汁腿”美成宅男梦想,凭腿撑起全台收视率,日本气象台最美主持
-
[辽宁男篮]大反转!CBA豪强官宣重大决定,彻底弃用全华班,惨遭球迷嘲讽
-
-
|田家五一要回九江?云昊直播中透露安排,许妈姚爸心愿很简单
-
戴维斯|G5要凉?湖人公布伤病报告,詹姆斯等5人有伤,浓眉出战成疑