- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
问董秘|那么贵公司置...,投资者提问:贵公司最新的公告有提到交易标的资产范围已基本确定
-
军事观察员@64架战机44艘战舰警告俄军,俄:军事打击,九国联军重返波罗的海
-
-
新作|《历史的天空》后,徐贵祥新作《英雄山》再谱抗战英雄传奇
-
对劳动人民的同情的诗句?古诗词中同情百姓疾苦的名句
-
知名演员何家劲深夜发文:“人神共愤!自有天收!”评论区沦陷
-
丁俊晖台球俱乐部有限公司.北京哪有环境比较好的台球厅?
-
-
-
-
讲武堂|进攻监狱失败,美国“起义军”再下一城:成功拿下亚特兰大停车场
-
青年|狐妖小红娘:月红缘分起始大揭秘!不是不爱,只是难以去爱
-
圈衡水|2020年7月16日衡水的天气情况(附京津冀地区限号提示)
-
-
-
-
|未来10天,深不见底,嘴上逞强,心在投降,再次追回旧爱的3星座
-
-
-
Adobe的人工智能原型可将物体粘贴到照片上 同时添加逼真的照明和阴影