- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
-
果果妈妈育儿经|宝妈的处理方法,告诉你什么叫熊家长,18包方便面被熊孩子捏碎
-
外交部发言人|美国防部长称希望年底前访华,外交部回应
-
-
-
一个孩子聪明与否是先天父母的基因问题还是后天的教育呢
-
主营业务|国联证券回A:提升资本实力打造优质中大型券商
-
DeepTech深科技|马斯克改写人类航天史!SpaceX实现全球首次商业载人发射,刚刚
-
-
-
如何应对老婆的冷漠,聪明女人怎么对待老公的冷暴力-
-
天使左翼溢@撞色运动上衣加短裤,时尚感提升不止一点点,运动风吴昕来袭
-
反派低智剧情离谱能忍,唯独妆容半永久、紧身衣的“女主”忍不了
-
-
慈禧太后垂帘听政,是为谁?慈禧太后重新垂帘听政的理由_5
-
瑞丽网|杨超越、赵露思……这些人间在逃公主,都在穿淘宝货???
-
-
晨娱秀场|《陈情令》永远都难以复制,“双男主”就会火吗?不满足这5点
-
-