- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
培训机构|北京各区已有200余校外培训机构申请线下复课
-
TVB新剧《新闻女王》:张家妍又漂亮又能干,为何在星网传媒无党无派?
-
问玉娱乐|因为走错路判17年,离世享受最高待遇,一生战功累累的将军
-
-
-
-
真心喜欢一个女孩子却不能接受她的一些性格,应该熬着生活下去还是各自分开
-
-
-
小明时尚说@条纹短裙穿出1米短腿,网友:还是穿短裤吧,鞠婧祎顶着素颜现身
-
-
『国服』蒙恬上线仅8个小时,已有人打上国服,一看胜率惹不起
-
-
沸腾人生|《沸腾人生》中三个“活得通透”的人,他们值得拥有幸福
-
儿童饮食|勤快宝妈做的晚餐,有荤有素,营养丰富,花钱不多,天天味道香
-
波妞时尚笔记|今年毛衣流行“披着穿”,保暖又时髦,难怪女星们总爱用它凹造型
-
电竞之城|RNG换上Betty有多离谱?LGD赛后采访:gala给的压力特别大
-
-
为啥|压缩饼干为啥这么抗饿放水中泡24小时后捞出,网友怕了怕了
-
|邻居61㎡新家看上去像200㎡,原来是用了客厅一体化,后悔晚知道