- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
-
-
-
大学|9岁高考,10岁上大学的神童女孩,撕开了神童最后一块遮羞布
-
-
-
-
学生|美国西雅图一大学宿舍区超100名学生感染新冠肺炎
-
事项|交易异动!航发控制:不存在应披露而未披露的重大事项 近3个交易日上涨21.89%
-
闻谈历史|那秃顶和光头的人该怎么办?看完你就知道了,清朝一律要留辫子
-
【PlanetHolidayTB】美食是旅途中的亮点
-
「天都」28岁小伙腹痛入院,查出病情大哭不止!父亲:一年有200天都吃它
-
-
每天小情书我去劝架!,十条段子:你去把剩下的衣服晾好
-
网通社汽车频道 Q7 霸气归来 全面升级诠释王者无畏,全新奥迪
-
新民网|上海降温啦!雨水这波“返场”满分 明最高温将降回“2”字头
-
杭州女子家中离奇失联十多天!电梯井、地下室、楼顶……更多细节揭露
-
羊肉放冷冻要洗吗长期保存方法 羊肉放冰箱冷冻前洗吗
-
-
中国新闻网|?7月多地收紧楼市 中国百城房价环比增速放缓