- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
[中国交通报]加强国际协作,畅通国际防疫物资“绿色通道”
-
-
凌风潮流|50岁的咏梅颜值太高了,保养的太好了,扎的公主头宛如少女!
-
稀范数码|南卡NANK-POWER2评测:无线+有线的手机电源支架
-
#兄弟影室#库克发愁苹果手机充电接口必须改为“华为接口”,华为这次躺赢
-
『局地』山东半岛地区今日仍有雷雨 局地冰雹伴9级大风
-
手术|40多岁仍旧是"小姐姐",这说明:带走青春的,从来不是岁月
-
2021年新竹园中学摇号录取比例 新竹园中学怎么样
-
-
-
-
-
开弓没有回头箭是什么生肖最佳选择?开弓没有回头箭是什么生肖2021
-
-
羊城晚报|签约“免费整形模特” 千余人背上高额贷款
-
「华为」任正非突然宣布!美国始料未及,美科技界炸锅:华为的好日子来了
-
-
-
发电站|世界第一!我国新能源年发电量首次突破1万亿度 占比13.8%
-
我家乡的菜美食日记|三亚婚纱摄影哪家好?婚纱照工作室榜单前十名公开!新人必看