- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
OMG|PCL小组赛结束,四大流量战队各有悲喜:4AM晋级OMG被淘汰
-
环球网|故技重施!黄之锋又“碰瓷”,这次把目标对准这两国
-
-
-
管理|安徽淮南市凤台县一建筑工地塔吊倒塌 造成5人死亡
-
初木影视|最具悬念赛季重启了,大热门广东、外援齐整的北京与难捉摸的辽宁
-
游戏|《LOL》MSI2022淘汰赛:RNG 2:0 拿到赛点!
-
封面新闻|30秒|英国伯明翰发生多起持刀伤人事件,嫌犯监控曝光
-
鞠婧祎|鞠婧祎为了方便拎起裙摆,无意间看到鞋跟高度,真怀疑她咋走路的
-
张柏芝|张柏芝晒儿子夫妻庆生,看到第二天谢霆锋发的朋友圈,网友:心疼
-
-
-
-
火箭炮|美售台远程火箭炮,严重威胁登陆部队?一张照片让台军绝望了
-
-
-
『消费测评世界』一个月销量230万部,5G国产机不香吗?,3月份国内iPhone销量激增
-
-
-