- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
「局评」美军去意大利抢了50万个棉签?这场抢劫打了精美精日者的脸
-
廉颇秒杀皮肤|王者荣耀:传说皮肤一天烂大街,廉颇喜提秒杀皮肤,白蛇首次加入夺宝
-
「宋品牌教你操盘运营」微商运营策略具体是什么?宋品牌亲自揭秘微商运营具体策略
-
杂文体育-横竖二|CBA新赛季大黑马出现!休赛季大力引援,两名外援等待和球队合练
-
17173You料app宣告失败,专区裁员,你咋看
-
-
Java 编程语言中很少被人了解的特性-statement label
-
新民晚报|诺兰“时空三部曲”最新力作《信条》今日发布中国版独家预告
-
-
借呗怎么没了变成网商贷了,支付宝借呗变成网商贷如何恢复
-
-
6.0级!3.6级!…深夜一小时内,四川宜宾连发5次地震
-
比特币|11年前用披萨换得10000比特币的小伙:早套现了、错过暴富机会
-
-
蓝鲸财经■微信更新IOS版本:支持深色模式 可跟随苹果系统设置
-
中国象棋|象棋棋盘各条线路的名称术语,适合学习象棋的新手们看
-
肺癌,妻子|晚期肺癌患者,花掉120万,不想再治!只想保住偷存的82万私房钱
-
-
美人归|风尘女的上联:野花不种年年有,秀才对出下联,赢得美人归
-
明星婚姻|拒当赌王儿媳,看不上长孙之父却追星王一博,姚安娜有多清醒?