- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
-
只缘身在此山中的前一句诗是什么,只缘身在此山中说的是哪一座山-
-
-
洪金宝|高丽虹罕见出镜,身材苗条难掩气质,与老公洪金宝同框像两代人
-
宫闱逸事|乾隆十二后妃真实容貌:一妃子像极了富察皇后,顺嫔并非绝色美人
-
美团|苹果新发布的Apple Watch居然不敌小天才儿童手表?
-
-
枕边文化■手动的还是自动的?老司机给你答案,哪个是安全的
-
电竞幽默|V5被SN全面碾压,想赢的Sofm强得离谱,就问TES现在慌不慌?
-
python:教你用爬虫通过代理自动刷网页的浏览量
-
-
华为手机华为P40 Pro+带来前无古人的10倍光学变焦
-
健康科谱范医生|常吃3物,杀菌消炎,促进毒素排出,提高抗病能力,秋季不想生病
-
手机大魔王12四千档起步,苹果手机越做越小,价格也越来越低,iPhone
-
【产建设兵团】首次双双降至个位!新增确诊病例:除湖北外省份9例,湖北除武汉外其他地市5例
-
子弹财经|在线教育暑期狂撒45亿背后:9元课每卖一单亏百元
-
-
荷叶妈咪|多半因为小时候的3种“懂事”行为,家长要注意,孩子长大情商低
-
中国地震台网速报|青海海西州格尔木市发生3.2级地震,震源深度7千米
-