- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
[元气满满]《元气满满》路透照曝光,杨洋泳池衣服湿透,身材壮硕胸肌让人眼馋
-
李亚鹏|被强制执行一个月后,再看李亚鹏的处境,李连杰全说对了
-
「轰炸机」印度准备扩建空军,重金采购12架轰炸机,要求2年内必须交付
-
阴性|疑似携带新冠病毒冷冻鸡翅流入?广西东兴、北流市回应:检测为阴性
-
电视剧|电视剧《仙剑奇侠传六》官宣:9.4分《琅琊榜》出品公司打造
-
智通财经 亚盛医药-B(06855)将在2020 AACR传递“中国声音”,携六项研究进展再登世界舞台
-
直面truth|给大脑创造一个能够协助你成长的学习环境吧
-
四川在线|数据寻迹“第四极”丨成渝万万没想到之哪个城市科技服务企业多?
-
央视新闻客户端|美国大选日:白宫外抗议者发生肢体冲突
-
-
-
发财树动不动就黄叶,冬天养护绕开这3点,整个冬天绿油油
-
LESSERAFIM|“HYBE的公主”LESSERAFIM错过发展黄金时机?出道争议仍未散?
-
知顿 二维码要被淘汰了?微信支付宝宣布,网友:这么快就来了?,原创
-
公司股本|天华超净:2020年限制性股票激励计划,每股16.69元
-
-
-
-
-