当前位置:网站首页>MR-WordCount
MR-WordCount
2022-06-28 05:38:00 【小山丘】
pom.xml
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>3.2.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<!--主函数入口-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.4</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>com.mr.demo.wordcount.WordCount</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<!--jdk定义-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
WordCount.java
MapReduce编程案例
package com.flink.mr.demo.wordcount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
import java.net.URI;
public class NeoWordCount {
public static class NeoWordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
private final LongWritable ONE = new LongWritable(1);
private final Text outputK = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
for (String s : value.toString().split(" ")) {
outputK.set(s);
context.write(outputK, ONE);
}
}
}
public static class NeoWordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
private final LongWritable outputV = new LongWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0;
for (LongWritable value : values) {
sum += value.get();
}
outputV.set(sum);
context.write(key, outputV);
}
}
public static void main(String[] args) throws Exception {
/*GenericOptionsParser parser = new GenericOptionsParser(args);
Job job = Job.getInstance(parser.getConfiguration());
args = parser.getRemainingArgs();*/
System.setProperty("HADOOP_USER_NAME","bigdata");
Configuration config = new Configuration();
config.set("fs.defaultFS","hdfs://10.1.1.1:9000");
config.set("mapreduce.framework.name","yarn");
config.set("yarn.resourcemanager.hostname","10.1.1.1");
// 跨平台参数
config.set("mapreduce.app-submission.cross-platform","true");
Job job = Job.getInstance(config);
job.setJar("D:\\bigdata\\mapreduces\\flink-mr.jar");
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setMapperClass(NeoWordCountMapper.class);
job.setReducerClass(NeoWordCountReducer.class);
job.setCombinerClass(NeoWordCountReducer.class);
Path inputPath = new Path("/user/bigdata/demo/001/input");
FileInputFormat.setInputPaths(job, inputPath);
Path outputPath = new Path("/user/bigdata/demo/001/output");
FileSystem fs = FileSystem.get(new URI("hdfs://10.1.1.1:9000"),config,"bigdata");
if(fs.exists(outputPath)){
fs.delete(outputPath,true);
}
FileOutputFormat.setOutputPath(job, outputPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
环境要求
1.本地打包形成
D:\bigdata\mapreduces\flink-mr.jar
2.Hadoop环境
10.1.1.1
3.文件准备
hdfs://10.1.1.1:9000/user/bigdata/demo/001/input
上传几个文件用于分析
4.运行本示例,提交MR任务到集群
边栏推荐
- numpy. reshape, numpy. Understanding of transfer
- [CAD drawing Video] AutoCAD 2014 master's way
- Filecoin黑客松开发者大赛
- 解决ValueError: Iterable over raw text documents expected, string object received.
- [JVM] - memory partition in JVM
- [JVM] - Division de la mémoire en JVM
- 2022 special operation certificate examination question bank and simulation examination for safety management personnel of fireworks and firecrackers business units
- jsp连接oracle实现登录注册(简单)
- Oracle 常用基础函数
- Sharing | intelligent environmental protection - ecological civilization informatization solution (PDF attached)
猜你喜欢
Yunda's cloud based business in Taiwan construction 𞓜 practical school
Docker安装Mysql5.7并开启binlog
OpenSSL client programming: SSL session failure caused by an obscure function
双向电平转换电路
Line animation
Docker installs mysql5.7 and starts binlog
Shutter nestedscrollview sliding folding head pull-down refresh effect
如何做好水库大坝安全监测工作
How to do a good job of dam safety monitoring
Quartus replication IP core
随机推荐
中小型水库大坝安全自动监测系统解决方案
Quartus replication IP core
Share a powerful tool for factor Mining: genetic programming
8VC Venture Cup 2017 - Elimination Round D. PolandBall and Polygon
To batch add background pictures and color changing effects to videos
电商转化率这么抽象,到底是个啥?
Zzuli:1071 decomposing prime factor
联想混合云Lenovo xCloud,新企业IT服务门户
学术搜索相关论文
6. 毕业设计温湿度监控系统(ESP8266 + DHT11 +OLED 实时上传温湿度数据给公网服务器并在OLED显示屏上显示实时温湿度)
Docker installs mysql5.7 and starts binlog
【C语言练习——打印空心正方形及其变形】
Oracle 条件、循环语句
Camera Basics
Maskrcnn,fast rcnn, faster rcnn优秀视频
2022年全国职业院校技能大赛“网络安全”竞赛试题官方答案
TypeScript接口
Why don't big manufacturers use undefined
JSP connecting Oracle to realize login and registration
独立站卖家都在用的五大电子邮件营销技巧,你知道吗?