手机日志分析需求
- 本文主要实现以下需求
- 编写数据生成器生成1G~10G大小的数据,字段必须包括id,日期,手机号码、型号、操作系统字段。
- 需要将手机号码4~9为掩码处理。
- 分析2021年、2022年操作系统市场占比、手机型号市场占比情况
- 分析2022年手机运营商市场占比情况
- 分析数据存储到HDFS集群/ana/phone节点下面
- 将分析结果存储到Mysql,并进行数据可视化
数据生成工具类
- 手机号码随机生成
- 可以采用随机数生成的方式,结合三大运营商的号码前三位数为规则进行生成 代码如下
/**
* @Description 生成三大运营商的手机号
* @Author 湧哥
* @Version 1.0
*/
/**
* 中国移动手机号段:
* 134、135、136、137、138、139、147、150、151、152、157、158、159、172、178、182、183、184、187、188、198、1703、1705、1706
* 中国联通手机号段:
* 130、131、132、145、155、156、166、171、175、176、185、186、1704、1707、1708、1709
* 中国电信手机号段:
* 133、153、173、177、180、181、189、191、193、199、1700、1701、1702
* 腾讯云API https://market.cloud.tencent.com/products/31101
*/
public class PhoneNumberGenerator {
//生成一万个手机号码,只需将 generatePhoneNumbers 方法中的参数 count 修改为 10000 即可
//移动
private static final String[] CHINA_MOBILE_PREFIX = {"134", "139", "150", "151", "182"};
//联通
private static final String[] CHINA_UNICOM_PREFIX = {"130","155","186"};
//电信
private static final String[] CHINA_TELECOM_PREFIX = {"133","153","180","181","189"};
public static void main(String[] args) {
String phoneNumbers = generatePhoneNumbers(1);
System.out.println(phoneNumbers);
}
public static String generatePhoneNumbers(int count) {
String phoneNumber=null;
Random random = new Random();
for (int i = 0; i < count; i++) {
String prefix;
int operatorIndex = random.nextInt(3);
switch (operatorIndex) {
case 0:
prefix = CHINA_MOBILE_PREFIX[random.nextInt(CHINA_MOBILE_PREFIX.length)];
break;
case 1:
prefix = CHINA_UNICOM_PREFIX[random.nextInt(CHINA_UNICOM_PREFIX.length)];
break;
default:
prefix = CHINA_TELECOM_PREFIX[random.nextInt(CHINA_TELECOM_PREFIX.length)];
}
phoneNumber = prefix + generateRandomNumber(random, 11 - prefix.length());
}
return replaceCharacters(phoneNumber,3,8);
}
private static String replaceCharacters(String input, int startIndex, int endIndex) {
if (input == null || input.length() < endIndex) {
return input;
}
StringBuilder sb = new StringBuilder(input);
for (int i = startIndex; i <= endIndex; i++) {
sb.setCharAt(i, '*');
}
return sb.toString();
}
private static String generateRandomNumber(Random random, int length) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < length; i++) {
sb.append(random.nextInt(10));
}
return sb.toString();
}
}
- 运营商解析的其中一种方式 【采用接口分析】
- 这里可以使用鹅厂或者其他厂商开发的接口进行运营商识别 申请获取对应的秘钥即可 例子如下
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
public class PhoneOperator {
public static String calcAuthorization(String source, String secretId, String secretKey, String datetime)
throws NoSuchAlgorithmException, UnsupportedEncodingException, InvalidKeyException {
String signStr = "x-date: " + datetime + "\n" + "x-source: " + source;
Mac mac = Mac.getInstance("HmacSHA1");
Key sKey = new SecretKeySpec(secretKey.getBytes("UTF-8"), mac.getAlgorithm());
mac.init(sKey);
byte[] hash = mac.doFinal(signStr.getBytes("UTF-8"));
String sig = new BASE64Encoder().encode(hash);
String auth = "hmac id=\"" + secretId + "\", algorithm=\"hmac-sha1\", headers=\"x-date x-source\", signature=\"" + sig + "\"";
return auth;
}
public static String urlencode(Map<?, ?> map) throws UnsupportedEncodingException {
StringBuilder sb = new StringBuilder();
for (Map.Entry<?, ?> entry : map.entrySet()) {
if (sb.length() > 0) {
sb.append("&");
}
sb.append(String.format("%s=%s",
URLEncoder.encode(entry.getKey().toString(), "UTF-8"),
URLEncoder.encode(entry.getValue().toString(), "UTF-8")
));
}
return sb.toString();
}
public static void main(String[] args) throws NoSuchAlgorithmException, UnsupportedEncodingException, InvalidKeyException {
//云市场分配的密钥Id
String secretId = "xx";
//云市场分配的密钥Key
String secretKey = "xx;
String source = "market";
Calendar cd = Calendar.getInstance();
SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss 'GMT'", Locale.US);
sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
String datetime = sdf.format(cd.getTime());
// 签名
String auth = calcAuthorization(source, secretId, secretKey, datetime);
// 请求方法
String method = "POST";
// 请求头
Map<String, String> headers = new HashMap<String, String>();
headers.put("X-Source", source);
headers.put("X-Date", datetime);
headers.put("Authorization", auth);
// 查询参数
Map<String, String> queryParams = new HashMap<String, String>();
queryParams.put("mobile","XXX");
// body参数
Map<String, String> bodyParams = new HashMap<String, String>();
// url参数拼接
String url = "https://service-8c43o60c-1253285064.gz.apigw.tencentcs.com/release/sms";
if (!queryParams.isEmpty()) {
url += "?" + urlencode(queryParams);
}
BufferedReader in = null;
try {
URL realUrl = new URL(url);
HttpURLConnection conn = (HttpURLConnection) realUrl.openConnection();
conn.setConnectTimeout(5000);
conn.setReadTimeout(5000);
conn.setRequestMethod(method);
// request headers
for (Map.Entry<String, String> entry : headers.entrySet()) {
conn.setRequestProperty(entry.getKey(), entry.getValue());
}
// request body
Map<String, Boolean> methods = new HashMap<>();
methods.put("POST", true);
methods.put("PUT", true);
methods.put("PATCH", true);
Boolean hasBody = methods.get(method);
if (hasBody != null) {
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
conn.setDoOutput(true);
DataOutputStream out = new DataOutputStream(conn.getOutputStream());
out.writeBytes(urlencode(bodyParams));
out.flush();
out.close();
}
// 定义 BufferedReader输入流来读取URL的响应
in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line;
String result = "";
while ((line = in.readLine()) != null) {
result += line;
}
System.out.println(result);
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
} finally {
try {
if (in != null) {
in.close();
}
} catch (Exception e2) {
e2.printStackTrace();
}
}
}
}
结果如下 (另一种方式为:直接根据前三位手机号进行判断)
模拟数据生成类
- 数据生成器 id,日期,手机号码、型号、操作系统
/**
* @Description
* 数据生成器 id,日期,手机号码、型号、操作系统
* id:UUID 随机生成 日期:2021 2022 手机号码:三大运营商 型号:Apple HuaWei Oppo Vivo Meizu Nokia 操作系统:Apple ios Harmony Samsung
* 1.分析2021年、2022年操作系统市场占比、手机型号市场占比情况
* 2.分析2022年手机运营商市场占比情况
* 3.分析数据存储到HDFS集群/ana/phone节点下面
* 4.将分析结果存储到Mysql,并进行数据可视化
* @Author 湧哥
* @Version 1.0
*/
public class DataGenerator {
public static void main(String[] args) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter("data/phone.log"));
for (int i = 0; i < 1000; i++) {
//UUID随机生成 id,日期,手机号码、型号、操作系统
String id = UUID.randomUUID().toString();
String date = getRandomDate();
String phoneNumber = PhoneNumberGenerator.generatePhoneNumbers(1);
String model = getRandomModel();
String operatingSystem = getRandomOperatingSystem();
String line = id + "," + date + "," + phoneNumber + "," + model + "," + operatingSystem;
writer.write(line);
writer.newLine();
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private static String getRandomDate() {
Random random = new Random();
int year = random.nextInt(2) == 0 ? 2021 : 2022;
int month = random.nextInt(12) + 1;
int dayOfMonth;
if (month == 2) {
dayOfMonth = random.nextInt(28) + 1;
} else if (month == 4 || month == 6 || month == 9 || month == 11) {
dayOfMonth= random.nextInt(30) + 1;
} else {
dayOfMonth= random.nextInt(31) + 1;
}
return year + "-" +
(month < 10 ? "0" : "") +
month+ "-" +
(dayOfMonth<10? "0":"")+
dayOfMonth ;
}
private static String getRandomPhoneNumber() {
Random random = new Random();
StringBuilder phoneNumber = new StringBuilder("1");
for (int i = 0; i < 10; i++) {
phoneNumber.append(random.nextInt(10));
}
return phoneNumber.toString();
}
private static String getRandomModel() {
String[] models = {"Apple", "HuaWei", "Oppo", "Vivo", "Meizu", "Nokia"};
return models[new Random().nextInt(models.length)];
}
private static String getRandomOperatingSystem() {
String[] operatingSystems = {"Apple", "HarmonyOS", "Samsung","iOS"};
return operatingSystems[new Random().nextInt(operatingSystems.length)];
}
}
结果如下
MapReduce程序需求编写
- 分析2021年、2022年操作系统市场占比、手机型号市场占比情况
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class PhoneOSAnalysis {
private static int totalCount;
private static int lineCount2021 = 0;
private static int lineCount2022 = 0;
public static class TokenizerMapper extends Mapper<Object, Text, Text, DoubleWritable> {
private final static DoubleWritable one = new DoubleWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
if (fields.length >= 5) {
// 操作系统市场占比
word.set(fields[1].substring(0, 4) + "-OS-" + fields[4]);
context.write(word, one);
// 手机型号市场占比
word.set(fields[1].substring(0, 4) + "-Model-" + fields[3]);
context.write(word, one);
}
}
}
public static class MarketShareReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
private DoubleWritable result = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
for (DoubleWritable val : values) {
//这里会根据分组的key来计算sum
sum += val.get();
}
int yearTotalCount = key.toString().contains("2021") ? lineCount2021 : lineCount2022;
double percentage = sum / yearTotalCount;
result.set(percentage);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path inputPath = new Path("data/phone.log");
FSDataInputStream inputStream = fs.open(inputPath);
try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputPath)))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.contains("2021")) {
lineCount2021++;
} else if (line.contains("2022")) {
lineCount2022++;
}
}
}
// totalCount = Math.max(lineCount2021, lineCount2022);
Job job = Job.getInstance(conf, "market share analysis");
job.setJarByClass(PhoneOSAnalysis.class);
job.setMapperClass(TokenizerMapper.class);
// 设置自定义分区器
job.setPartitionerClass(CustomPartitioner.class);
job.setNumReduceTasks(2);
job.setReducerClass(MarketShareReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
Path outputPath = new Path("data/result");
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
FileInputFormat.addInputPath(job, new Path("data/phone.log"));
FileOutputFormat.setOutputPath(job, new Path(String.valueOf(outputPath)));
// TextInputFormat.addInputPath(job, new Path("hdfs://192.168.192.100:8020/"));
// TextInputFormat.outInputPath(job, new Path("hdfs://192.168.192.100:8020/"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class CustomPartitioner extends Partitioner<Text, DoubleWritable> {
@Override
public int getPartition(Text key, DoubleWritable value, int numPartitions) {
// 根据年份进行分区
if (key.toString().contains("2021")) {
return 0;
} else {
return 1;
}
}
}
}
- 分析2022年手机运营商市场占比情况
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class OperatorMR {
private static int lineCount2022 = 0;
public static class TokenizerMapper extends Mapper<Object, Text, Text, DoubleWritable> {
private final static DoubleWritable one = new DoubleWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
if (fields.length >= 3 && fields[1].contains("2022")) {
// 手机运营商市场占比
word.set(fields[1].substring(0, 4) + "-Operator-" + getCarrier(fields[2]));
context.write(word, one);
}
}
private String getCarrier(String phoneNumber) {
String prefix = phoneNumber.substring(0, 3);
switch (prefix) {
//"133","153","180","181","189"
case "133":
case "153":
case "180":
case "181":
case "189":
return "电信";
//"130","155","186"
case "130":
case "155":
case "186":
return "联通";
default:
return "移动";
}
}
}
public static class MarketShareReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> {
private DoubleWritable result = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
for (DoubleWritable val : values) {
sum += val.get();
}
double percentage = sum / lineCount2022;
result.set(percentage);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path inputPath = new Path("data/phone.log");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputPath)))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.contains("2022")) {
lineCount2022++;
}
}
}
Job job = Job.getInstance(conf, "PhoneOperator");
job.setJarByClass(OperatorMR.class);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(MarketShareReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
Path outputPath = new Path("data/result-phone");
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
// TextInputFormat.addInputPath(job, new Path("hdfs://192.168.192.100:8020/"));
// TextInputFormat.outInputPath(job, new Path("hdfs://192.168.192.100:8020/"));
FileInputFormat.addInputPath(job, new Path("data/phone.log"));
FileOutputFormat.setOutputPath(job, outputPath);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
结果如下
-将分析结果存储到Mysql,并进行数据可视化
package com.yopai.mrmysql;
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
public class OPMysqlMR {
private static int lineCount2022 = 0;
public static class TokenizerMapper extends Mapper<Object, Text, Text, DoubleWritable> {
private final static DoubleWritable one = new DoubleWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String[] fields = value.toString().split(",");
if (fields.length >= 3 && fields[1].contains("2022")) {
// 手机运营商市场占比
word.set(fields[1].substring(0, 4) + "-Operator-" + getCarrier(fields[2]));
context.write(word, one);
}
}
private String getCarrier(String phoneNumber) {
String prefix = phoneNumber.substring(0, 3);
switch (prefix) {
case "133":
case "153":
case "180":
case "181":
case "189":
return "电信";
case "130":
case "155":
case "186":
return "联通";
default:
return "移动";
}
}
}
public static class MarketShareReducer extends Reducer<Text, DoubleWritable, DBOutputWritable, NullWritable> {
private DoubleWritable result = new DoubleWritable();
public void reduce(Text key, Iterable<DoubleWritable> values, Context context)
throws IOException, InterruptedException {
double sum = 0;
for (DoubleWritable val : values) {
sum += val.get();
}
double percentage = sum / lineCount2022;
result.set(percentage);
context.write(new DBOutputWritable(key.toString(), result.get()), NullWritable.get());
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// 设置数据库连接信息
String dbUrl = "jdbc:mysql://localhost:3306/blog";
String dbUsername = "root";
String dbPassword = "Admin2022!";
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", dbUrl, dbUsername, dbPassword);
try (Connection connection = DriverManager.getConnection(dbUrl, dbUsername, dbPassword)) {
String createTableSql = "CREATE TABLE IF NOT EXISTS operator_market_share(operator VARCHAR(255), market_share DOUBLE)";
PreparedStatement preparedStatement = connection.prepareStatement(createTableSql);
preparedStatement.executeUpdate();
}
FileSystem fs = FileSystem.get(conf);
Path inputPath = new Path("data/phone.log");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(inputPath)))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.contains("2022")) {
lineCount2022++;
}
}
}
Job job = Job.getInstance(conf, "PhoneOperator");
job.setJarByClass(OPMysqlMR.class);
job.setMapperClass(TokenizerMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setReducerClass(MarketShareReducer.class);
job.setOutputKeyClass(DBOutputWritable.class);
job.setOutputValueClass(NullWritable.class);
// 设置数据库输出
DBOutputFormat.setOutput(job, "operator_market_share", "operator", "market_share");
FileInputFormat.addInputPath(job, new Path("data/phone.log"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
package com.yopai.mrmysql;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.lib.db.DBWritable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
public class DBOutputWritable implements Writable, DBWritable {
private String operator;
private double market_share;
public DBOutputWritable() {
}
public DBOutputWritable(String operator, double market_share) {
this.operator = operator;
this.market_share = market_share;
}
@Override
public void readFields(DataInput in) throws IOException {
operator = in.readUTF();
market_share = in.readDouble();
}
@Override
public void write(DataOutput out) throws IOException, IOException {
out.writeUTF(operator);
out.writeDouble(market_share);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// 不需要实现此方法,因为我们只会写入数据到数据库
}
@Override
public void write(PreparedStatement preparedStatement) throws SQLException {
preparedStatement.setString(1, operator);
preparedStatement.setDouble(2, market_share);
}
}
运行结果如下
- 可视化操作
package com.yopai.draw;
/**
* @Description
* @Author 湧哥
* @Version 1.0
*/
import java.awt.*;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import javax.swing.JFrame;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartPanel;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.PiePlot;
import org.jfree.data.general.DefaultPieDataset;
public class PieChartExample extends JFrame {
public PieChartExample() {
// 从数据库获取数据
DefaultPieDataset dataset = new DefaultPieDataset();
try {
String dbUrl = "jdbc:mysql://localhost:3306/blog";
String dbUsername = "root";
String dbPassword = "Admin2022!";
Connection connection = DriverManager.getConnection(dbUrl, dbUsername, dbPassword);
Statement statement = connection.createStatement();
ResultSet resultSet = statement.executeQuery("SELECT operator, market_share FROM operator_market_share");
while (resultSet.next()) {
String operator = resultSet.getString("operator");
double marketShare = resultSet.getDouble("market_share");
dataset.setValue(operator, marketShare);
}
} catch (Exception e) {
e.printStackTrace();
}
// 创建饼图
JFreeChart pieChart = ChartFactory.createPieChart(
"运营商市场占比", // 图表标题
dataset, // 数据集
true, // 是否显示图例
true, // 是否生成工具提示
false // 是否生成URL链接
);
// 设置字体以显示中文
Font font = new Font("宋体", Font.PLAIN, 12);
pieChart.getTitle().setFont(font);
pieChart.getLegend().setItemFont(font);
PiePlot plot = (PiePlot) pieChart.getPlot();
plot.setLabelFont(font);
// 添加饼图到面板并显示
ChartPanel chartPanel = new ChartPanel(pieChart);
setContentPane(chartPanel);
}
public static void main(String[] args) {
PieChartExample pieChartExample = new PieChartExample();
pieChartExample.setSize(600, 600);
pieChartExample.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
pieChartExample.setVisible(true);
}
}
结果如下
本篇文章到这里结束 需要注意的是每个人的环境不用调用的API会有所差异。