This paper aims to comprehensively analyze the characteristics of user behavior model on the basis of real-time system solutions, design and develop a Hadoop-based real-time user behavior analysis system, and use it in actual e-commerce websites.Analyze and study user behavior in e-commerce websites; analyze the application scenarios of user behavior analysis system and design a real-time user behavior analysis system based on Hadoop; develop real-time user behavior analysis system under JAVAEE framework; and put real-time user behavior analysis system into use in e-commerce website.
The graduation design project is based on Hadoop electricity business user behavior analysis system, so the research direction is the Hadoop direction of big data, in my project my main research direction is the source of data collection, through Flume buried point to collect log information, or through Nginx reverse proxy server to collect log information, to collect our need users to browse the web information, and then is the study of big data storage problem.
Key words: e-commerce data; user behavior; analysis system; Hadoop; Java
摘 要 I
Abstract II
第1章 绪论 1
1.1 编写目的 1
1.2 背景及意义 1
1.3 开发及运行环境 1
第2章 系统相关技术介绍 3
2.1 hadoop系统架构 3
2.1.1 hadoop 生态架构和概况 3
2.2 HDFS(Hadoop分布式文件系统) 4
2.2.1 HDFS 设计说明 4
2.2.2 HDFS的存储机制和核心 4
2.3 Mapreduce(分布式计算框架) 7
2.3.1 mapreduce计算框架设计说明 7
2.3.2 Hadoop的Mapreduce计算开源框架的计算流程: 8
2.4 Yarn(资源管理框架) 9
2.4.1 yarn的架构 9
2.4.2 yarn的各个角色的职责 10
2.4.3 YARN 作业执行流程 11
2.5 系统中的数据库介绍和其他工具简介 12
2.5.1 hive是什么 12
2.5.2 hive的架构 13
2.5.3 hive的特点 13
2.5.4 hbase是什么 13
2.5.5 hbase的存储机制和表结构 14
2.5.7 数据导入导出工具sqoop和日志收集工具flume 15
第3章 需求分析 17
3.1 系统概述 17
3.2 系统功能需求 17
3.2.1 收集原始数据 17
3.2.2 计算物品相似度矩阵 18
3.2.3 计算用户购买向量 18
3.2.4 计算推荐向量并去重和排序 18
3.2.5 数据入库 18
3.2.6 作业控制 19
3.2.7商品推荐功能 19
3.3 系统非功能需求 19
第4章 概要设计 20
4.1系统架构设计 20
4.2系统层次架构设计 21
4.3系统功能模块设计 22
4.3.1 计算物品相似度矩阵 22
4.3.2推荐矩阵(相似度矩阵*向量) 23
4.3.3对推荐向量进行处理 23
4.3.4数据入库 23
4.4系统数据库设计 24
4.5推荐模块程序流程图 25
4.6系统架构图 25
4.7数据预处理层 26
4.8推荐结果生成层 26
4.9推荐系统流程图 27
第5章 系统实现 28
5.1计算用户购买商品的列表 28
5.2计算商品的共现关系 28
5.3计算用户的购买向量 28
5.4推荐结果 29
5.5数据去重 29
5.6推荐结果入库 30
5.7构建作业流对象 32
第6章 系统测试 33
6.1计算用户购买商品的列表 33
6.2计算商品的共现次数(共现矩阵) 33
6.3计算用户的购买向量 33
6.4推荐结果 33
6.5数据去重 34
6.6推荐结果入库 35
6.7 web系统推荐商品实现 35
总 结 36
推荐系统是基于用户、商品行为数据来进行推荐的,没有用户商品数据的推荐系统是无法进行推荐的。rawdata文件:该文件是收集用户对物品的偏好,形成“用户 物品 偏好”的数据集。数据格式:用户编号 物品编号 偏好值。
package com.zwj.controller;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpSession;
import org.apache.ibatis.session.SqlSession;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.SessionAttributes;
import com.zwj.service.ProductService;
import com.zwj.service.UserService;
import com.zwj.vo.Cart;
import com.zwj.vo.Product;
import com.zwj.vo.User;
public class UserController{
private SqlSession sqlSession;
private UserService us;
private ProductService ps;
private HttpServletRequest req;
private int count;
public String userReg(User user) throws IOException{
Map<String,Object> map = new HashMap<String, Object>();
map.put("phone_mobile", user.getPhone_mobile());
map.put("login_password", user.getLogin_password());
Pattern pattern = Pattern.compile("^((13[0-9])|(15[^4,\\D])|(17[6])|(18[01236789]))\\d{8}$");
Matcher matcher = pattern.matcher(user.getPhone_mobile());
if(user.getPhone_mobile()==null || user.getLogin_password()==null || !matcher.matches()){
return "pages/register-fail.html";
Date date = new Date();
DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
map.put("register_time", df.format(date));
String s = UUID.randomUUID().toString();
String user_code = s.substring(0,8)+s.substring(9,13)+s.substring(14,18)+s.substring(19,23)+s.substring(24);
map.put("user_code", user_code);
int a = sqlSession.insert("com.zwj.dao.addUser",map);
req.setAttribute("phone_mobile", user.getPhone_mobile());
req.setAttribute("login_password", user.getLogin_password());
return "pages/register-ok.html";
public User userJudge(String phone_mobile) {
User u = sqlSession.selectOne("com.zwj.dao.judgeUser",phone_mobile);
return u;
public String userLogin(String phone_mobile,String login_password) throws IOException{
Pattern pattern = Pattern.compile("^((13[0-9])|(15[^4,\\D])|(17[6])|(18[01236789]))\\d{8}$");
Matcher matcher = pattern.matcher(phone_mobile);
if(phone_mobile==null || login_password==null || !matcher.matches()){
return "pages/login-fail.html";
User u = us.userLogin(phone_mobile, login_password);
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.getSession().setAttribute("u", u);
req.setAttribute("count", count);
return "pages/main.jsp";
return "pages/login-fail.html";
//用户退出销毁session 跳转到登录页
public String userExit(HttpSession session){
return "index.html";
public String showCart(HttpSession session) throws IOException{
User u = (User) session.getAttribute("u");
List<Cart> c= ps.showCart(Long.parseLong(u.getPhone_mobile()));
req.setAttribute("c", c);
return "pages/cart.jsp";
public String addCart(Long pid,HttpSession session) throws IOException{
Map<String,Object> map = new HashMap<String, Object>();
Product p = ps.showProduct(pid);
User u = (User) session.getAttribute("u");
map.put("uid", u.getPhone_mobile());
map.put("pic", p.getPic());
map.put("pname", p.getName());
map.put("num", 1);
map.put("price", p.getPrice());
int a = sqlSession.insert("com.zwj.dao.addCart",map);
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/addCart-ok.jsp";
public String more_xm(HttpSession session){
User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/xm6_m.jsp";
public String more_pg(HttpSession session){
User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/pg_m.jsp";
public String more_gl(HttpSession session){
User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/gl_m.jsp";
public String more_jn(HttpSession session){
User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/jn_m.jsp";
public String main(HttpSession session){
User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);
return "pages/main.jsp";
public String result(HttpSession session){
/*User u = (User) session.getAttribute("u");
count = sqlSession.selectOne("com.zwj.dao.count",u.getPhone_mobile());
req.setAttribute("count", count);*/
return "pages/result.jsp";