1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
   | 
  import random import time
  url_paths = { 	"class/112.html":10, 	"class/128.html":5, 	"class/145.html":8, 	"class/146.html":10, 	"class/131.html":3, 	"class/130.html":2, 	"learn/821":1, 	"course/list":1 }
  ip_slices = [132,156,124,10,29,167,143,187,30,46,55,63,72,87,98,168]
  http_referers = [ 	"http://www.baidu.com/s?wd={query}", 	"https://www.sogou.com/web?query={query}", 	"http://cn.bing.com/search?q={query}", 	"https://search.yahoo.com/search?p={query}", ]
  search_keyword = [ 	"新近发布", 	"为你推荐", 	"今日歌单", 	"瞩目艺人", 	"今日专辑" ]
  status_codes = ["200","404","500"]
  def sample_url(): 	all_data = [] 	for v, w in url_paths.items(): 		temp = [] 		for i in range(w): 			temp.append(v) 		all_data.extend(temp)
  	return random.sample(all_data, 1)[0]
  def sample_ip(): 	slice = random.sample(ip_slices , 4) 	return ".".join([str(item) for item in slice])
  def sample_referer(): 	if random.uniform(0, 1) > 0.2: 		return "-"
  	refer_str = random.sample(http_referers, 1) 	query_str = random.sample(search_keyword, 1) 	return refer_str[0].format(query=query_str[0])
  def sample_status_code(): 	return random.sample(status_codes, 1)[0]
  def generate_log(): 	 	time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 	now_hour = time.strftime("%H", time.localtime()) 	 	if now_hour < "01": 		count = random.randint(80,100) 	elif now_hour < "02": 		count = random.randint(60,85) 	elif now_hour < "03": 		count = random.randint(50,75) 	elif now_hour < "05": 		count = random.randint(30,60) 	elif now_hour < "06": 		count = random.randint(50,65) 	elif now_hour < "08": 		count = random.randint(70,85) 	elif now_hour < "10": 		count = random.randint(70,95) 	elif now_hour < "11": 		count = random.randint(80,100) 	elif now_hour < "13": 		count = random.randint(95,150) 	elif now_hour < "15": 		count = random.randint(90,130) 	elif now_hour < "17": 		count = random.randint(80,120) 	elif now_hour < "18": 		count = random.randint(85,130)	 	elif now_hour < "19": 		count = random.randint(100,120) 	else: 		count = random.randint(110,160)	
  	f = open("/home/hadoop/data/project/logs/access.log","w+")
  	while count >= 1: 		query_log = "{ip}\t{local_time}\t\"GET /{url} HTTP/1.1\"\t{status_code}\t{referer}".format(url=sample_url(), ip=sample_ip(), referer=sample_referer(), status_code=sample_status_code(),local_time=time_str)
  		f.write(query_log + "\n")
  		count = count - 1 
  if __name__ == '__main__': 	generate_log()
 
  |