hadoop 》》 django 简单操作hdfs 语句
》》
from django.shortcuts import render # Create your views here. from hdfs.client import Client from django.views import View from hdfs.client import Client import os # # # 关于python操作hdfs的API可以查看官网: # # https://hdfscli.readthedocs.io/en/latest/api.html # # # 读取hdfs文件内容,将每行存入数组返回 # def read_hdfs_file(client, filename): # lines = [] # with client.read(filename, encoding=‘utf-8‘, delimiter=‘\n‘) as reader: # for line in reader: # # pass # # print line.strip() # lines.append(line.strip()) # return lines # # # # 创建目录 # def mkdirs(client, hdfs_path): # client.makedirs(hdfs_path) # # # # 删除hdfs文件 # def delete_hdfs_file(client, hdfs_path): # client.delete(hdfs_path) # # # # 上传文件到hdfs def put_to_hdfs(client, local_path, hdfs_path): client.upload(hdfs_path, local_path, cleanup=True) # # # 从hdfs获取文件到本地 # def get_from_hdfs(client, hdfs_path, local_path): # download(hdfs_path, local_path, overwrite=False) # # # # 追加数据到hdfs文件 # def append_to_hdfs(client, hdfs_path, data): # client.write(hdfs_path, data, overwrite=False, append=True) # # # # 覆盖数据写到hdfs文件 def write_to_hdfs(client, hdfs_path, data): client.write(hdfs_path, data, overwrite=True, append=False) # # # 移动或者修改文件 def move_or_rename(client, hdfs_src_path, hdfs_dst_path): client.rename(hdfs_src_path, hdfs_dst_path) # # # # 返回目录下的文件 # def list(client, hdfs_path): # return client.list(hdfs_path, status=False) # # # # root:连接的跟目录 # client = Client("http://192.168.88.129:50070", # root="/", timeout=5 * 1000, session=False) # # put_to_hdfs(client,‘a.csv‘,‘/user/root/a.csv‘) # # append_to_hdfs(client,‘/b.txt‘,‘111111111111111‘+‘\n‘) # # write_to_hdfs(client,‘/b.txt‘,‘222222222222‘+‘\n‘) # # move_or_rename(client,‘/b.txt‘, ‘/user/b.txt‘) # mkdirs(client, ‘/input1/python1‘) # print(list(client,‘/input‘)) # read_hdfs_file(client,‘/‘) # client.list("/") def mkdirs(client, hdfs_path): client.makedirs(hdfs_path) def get_IS_File(client,hdfs_paht): return client.status(hdfs_paht)[‘type‘] == ‘FILE‘ from hadoop_hdfs.settings import UPLOAD_ROOT import os def put_to_hdfs(client, local_path, hdfs_path): client.upload(hdfs_path, local_path, cleanup=True) client = Client("http://192.168.88.129:50070", root="/", timeout=5 * 1000, session=False) class Index(View): def get(self,request): return render(request,"index.html") def post(self,request): def uploadfile(img): f = open(os.path.join(UPLOAD_ROOT, ‘‘, img.name), ‘wb‘) for chunk in img.chunks(): f.write(chunk) f.close() def read_hdfs_file(client, filename): lines = [] with client.read(filename, encoding=‘utf-8‘, delimiter=‘\n‘) as reader: for line in reader: # pass # print line.strip() lines.append(line.strip()) return lines file = request.FILES.get("file") uploadfile(file) all_file = client.list("/") for i in all_file: file_true =get_IS_File(client,"/{}".format(i)),i print(file_true) # if file_true == "True": return render(request,"index.html",locals()) # else: # pass # # data = {"file_true":file_true} # return render(request,"index.html",locals()) # else: # pass # get_IS_File(all_file,"/") # lujin = "/upload/"+file.name mkdirs(client,file.name) # move_or_rename(client,file.name, ‘/c.txt‘) # put_to_hdfs(client, "C:/Users/Lenovo/Desktop/hadoop_hdfs/upload/"+file.name, ‘/‘) # write_to_hdfs(client,"upload"+file.name,‘222222222222‘+‘\n‘) return render(request,"index.html",locals())
Views.py
# 导入必要模块 import pandas as pd from sqlalchemy import create_engine from matplotlib import pylab as plt from django.views import View from django.shortcuts import render import os from hadoop_hdfs.settings import UPLOAD_ROOT from web.models import * def uploadfile(img): f = open(os.path.join(UPLOAD_ROOT, ‘‘, img.name), ‘wb‘) for chunk in img.chunks(): f.write(chunk) f.close() class Upload(View): def get(self,request): # show = Image.objects.all() return render(request,"haha.html",locals()) def post(self,request): imgs = request.FILES.get(‘img‘) uploadfile(imgs) all = Image(img="/upload/"+imgs.name) all.save() return render(request,"haha.html") # def post(self,request): # # 初始化数据库连接,使用pymysql模块 # # MySQL的用户:root, 密码:147369, 端口:3306,数据库:mydb # engine = create_engine(‘mysql+pymysql://root:@127.0.0.1/dj‘) # # # 查询语句,选出employee表中的所有数据 # sql = ‘‘‘select * from haha;‘‘‘ # # # read_sql_query的两个参数: sql语句, 数据库连接 # df = pd.read_sql_query(sql, engine) # # # 输出employee表的查询结果 # print(df) # # # 新建pandas中的DataFrame, 只有id,num两列 # # df = pd.DataFrame({‘id‘:[1,2],‘name‘: [‘111‘,‘222‘],‘image_url‘:[‘http://111‘,‘http://222‘]}) # # df = pd.DataFrame({"id":df[‘sentiment‘],"text":df[‘text‘],}) # df.groupby(by=‘sentiment‘).count()[‘text‘].plot.pie(autopct="%0.4f%%", subplots=True) # plt.savefig("../upload/1.jpg") # plt.show() # # 将新建的DataFrame储存为MySQL中的数据表,不储存index列 # # df.to_sql(name=‘lallala‘, con=engine, index=False) # # print(‘Read from and write to Mysql table successfully!‘) # return render(request,"haha.html",locals()) class Uploads(View): def get(self,request): show = Image.objects.all() return render(request,"haha.html",locals())
haha.py