- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
-
鹰之翼|下场吃了东西后立即返场演讲,令人尊敬!叙利亚总统突感昏厥
-
|?65岁元彪消瘦现身,坐露天咖啡店抽雪茄,面部崎岖皮肤松弛显老态!
-
周到|“早餐工程”流动餐车来了 各路餐车争“鲜”亮相
-
北晚新视觉网|洛杉矶骚乱533人被警方逮捕,美国示威活动蔓延多地
-
-
马斯克在德州招人开发人脑植入设备 2020电动汽车行业现状及发展前景趋势分析研究报告
-
-
-
-
大空头国际视野|10年期国债收益率的最新上涨再次引发了通货再膨胀预期
-
年华不复|对职场女性来说,苛求完美暴露了你什么?做一个取悦者是好的吗?
-
-
-
网易娱乐|郑恺发道歉声明承认火锅店抄袭:已将该部分撤下
-
-
[白血病]“空中对话”来袭,透过临床实践看再生障碍性贫血的诊疗
-
莫少聪|曝莫少聪看不上洪欣3个原因:曾比刘德华咖位高,前程好,婚恋魅力大
-
翔哥文史|曹丕称帝后,没有杀死汉献帝,曹丕不怕汉献帝造反吗?,原创
-