- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
人民日报客户端|我国载人航天工程第3批预备航天员选拔工作完成
-
-
社会谈资|冯巩曾被保姆勒索,不给50万分手费就公开关系,事情真相竟是这样
-
-
AirPods|苹果AirPods被指控侵犯Gwee磁吸式收纳充电专利
-
-
CPE中国幼教展探索教育新模式,浙江童园新品首秀引关注
-
体坛克卜勒|阿扎尔不香吗?对比C罗比利时球星一数据太神奇,皇马夺冠吉祥物
-
强劲|国家税务总局:我国居民消费需求集中释放 国内消费市场强劲
-
-
太平洋电脑网供电强劲 表现全面的AMD B550新主板来了
-
-
引领时尚新时代|张碧晨晒出一组时尚街拍,又美出不一样的时髦
-
-
#健康之初TB#酸奶能减肥吗?喝哪种酸奶好?关于酸奶的7个问题,一次解答清楚
-
-
廖凡▲45岁影帝廖凡隐藏9年的爱妻,比他大5岁不算,还是我们的老熟人!
-
-
-
『上游新闻』两江新区入选国家新型工业化产业示范基地