- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
-
董事长|东华能源:2020年前三季度净利润约10.29亿元,同比增加1.88%
-
-
拜登正式被提名为美国总统候选人-民主党正式提名拜登为总统候选人
-
梦幻西游■梦幻西游:“160级灵饰”正式公布,大量服战老板退游或是先知!
-
-
-
任正非|光刻机,快了?中科院刚表态攻关卡脖子,任正非就来中科院了
-
-
王者解说|辅助成了重头戏,充斥着满满的“阴谋”,IG对战DMO
-
-
隆隆谈侃体育|球迷:太重情义,老而弥坚!成功留队张庆鹏挑战极限
-
-
《水晶之恋》播出20年,女主刘颖消失得无影无踪,男主于波却沦为热播剧的配角
-
-
官宣,真的是噩耗啊,年仅41岁,雷霆怎么会放过这个悍将,肯定是要笑了啊
-
参加旅行团,旅客最想享受到啥样的创新服务,譬如美女陪游
-
-
-
卡洛斯|此人被称为“球王”,实际上不会踢球,却与20支球队签约混了26年