hadoop 》》 django 简单操作hdfs 语句

IT智囊

2019-12-13

》》

from django.shortcuts import render
# Create your views here.

from hdfs.client import Client
from django.views import View
from hdfs.client import Client
import os

#
# # 关于python操作hdfs的API可以查看官网:
# # https://hdfscli.readthedocs.io/en/latest/api.html
#
# # 读取hdfs文件内容,将每行存入数组返回
# def read_hdfs_file(client, filename):
#     lines = []
#     with client.read(filename, encoding=‘utf-8‘, delimiter=‘\n‘) as reader:
#         for line in reader:
#             # pass
#             # print line.strip()
#             lines.append(line.strip())
#     return lines
#
#
# # 创建目录
# def mkdirs(client, hdfs_path):
#     client.makedirs(hdfs_path)
#
#
# # 删除hdfs文件
# def delete_hdfs_file(client, hdfs_path):
#     client.delete(hdfs_path)
#
#
# # 上传文件到hdfs
def put_to_hdfs(client, local_path, hdfs_path):
    client.upload(hdfs_path, local_path, cleanup=True)

#
# # 从hdfs获取文件到本地
# def get_from_hdfs(client, hdfs_path, local_path):
#     download(hdfs_path, local_path, overwrite=False)
#
#
# # 追加数据到hdfs文件
# def append_to_hdfs(client, hdfs_path, data):
#     client.write(hdfs_path, data, overwrite=False, append=True)
#
#
# # 覆盖数据写到hdfs文件
def write_to_hdfs(client, hdfs_path, data):
    client.write(hdfs_path, data, overwrite=True, append=False)

#
# # 移动或者修改文件
def move_or_rename(client, hdfs_src_path, hdfs_dst_path):
    client.rename(hdfs_src_path, hdfs_dst_path)
#
#
# # 返回目录下的文件
# def list(client, hdfs_path):
#     return client.list(hdfs_path, status=False)
#
#
# # root:连接的跟目录
# client = Client("http://192.168.88.129:50070",
#                 root="/", timeout=5 * 1000, session=False)
# # put_to_hdfs(client,‘a.csv‘,‘/user/root/a.csv‘)
# # append_to_hdfs(client,‘/b.txt‘,‘111111111111111‘+‘\n‘)
# # write_to_hdfs(client,‘/b.txt‘,‘222222222222‘+‘\n‘)
# # move_or_rename(client,‘/b.txt‘, ‘/user/b.txt‘)
# mkdirs(client, ‘/input1/python1‘)
# print(list(client,‘/input‘))
# read_hdfs_file(client,‘/‘)
# client.list("/")
def mkdirs(client, hdfs_path):
    client.makedirs(hdfs_path)

def get_IS_File(client,hdfs_paht):
    return client.status(hdfs_paht)[‘type‘] == ‘FILE‘
from hadoop_hdfs.settings import UPLOAD_ROOT
import os

def put_to_hdfs(client, local_path, hdfs_path):
        client.upload(hdfs_path, local_path, cleanup=True)
client = Client("http://192.168.88.129:50070",
                root="/", timeout=5 * 1000, session=False)
class Index(View):
    def get(self,request):
        return render(request,"index.html")
    def post(self,request):
        def uploadfile(img):
            f = open(os.path.join(UPLOAD_ROOT, ‘‘, img.name), ‘wb‘)
            for chunk in img.chunks():
                f.write(chunk)
            f.close()

        def read_hdfs_file(client, filename):
            lines = []
            with client.read(filename, encoding=‘utf-8‘, delimiter=‘\n‘) as reader:
                for line in reader:
                    # pass
                    # print line.strip()
                    lines.append(line.strip())
            return lines


        file = request.FILES.get("file")
        uploadfile(file)
        all_file = client.list("/")
        for i in all_file:
            file_true =get_IS_File(client,"/{}".format(i)),i
            print(file_true)
            # if file_true == "True":
            return render(request,"index.html",locals())
            # else:
            #     pass
            #     # data = {"file_true":file_true}
            #     return render(request,"index.html",locals())
            # else:
            #     pass
        # get_IS_File(all_file,"/")
        # lujin = "/upload/"+file.name
        mkdirs(client,file.name)
        # move_or_rename(client,file.name, ‘/c.txt‘)
        # put_to_hdfs(client, "C:/Users/Lenovo/Desktop/hadoop_hdfs/upload/"+file.name, ‘/‘)
        # write_to_hdfs(client,"upload"+file.name,‘222222222222‘+‘\n‘)
        return render(request,"index.html",locals())

Views.py

# 导入必要模块
import pandas as pd
from sqlalchemy import create_engine
from  matplotlib import pylab as plt
from django.views import View
from django.shortcuts import render
import os
from hadoop_hdfs.settings import UPLOAD_ROOT
from web.models import *
def uploadfile(img):
 f = open(os.path.join(UPLOAD_ROOT, ‘‘, img.name), ‘wb‘)
 for chunk in img.chunks():
     f.write(chunk)
 f.close()
class Upload(View):
    def get(self,request):
        # show = Image.objects.all()
        return render(request,"haha.html",locals())
    def post(self,request):
        imgs = request.FILES.get(‘img‘)
        uploadfile(imgs)
        all = Image(img="/upload/"+imgs.name)
        all.save()
        return render(request,"haha.html")
    # def post(self,request):
    #     # 初始化数据库连接，使用pymysql模块
    #     # MySQL的用户：root, 密码:147369, 端口：3306,数据库：mydb
    #     engine = create_engine(‘mysql+pymysql://root:@127.0.0.1/dj‘)
    #
    #     # 查询语句，选出employee表中的所有数据
    #     sql = ‘‘‘select * from haha;‘‘‘
    #
    #     # read_sql_query的两个参数: sql语句， 数据库连接
    #     df = pd.read_sql_query(sql, engine)
    #
    #     # 输出employee表的查询结果
    #     print(df)
    #
    #     # 新建pandas中的DataFrame, 只有id,num两列
    #     # df = pd.DataFrame({‘id‘:[1,2],‘name‘: [‘111‘,‘222‘],‘image_url‘:[‘http://111‘,‘http://222‘]})
    #     # df = pd.DataFrame({"id":df[‘sentiment‘],"text":df[‘text‘],})
    #     df.groupby(by=‘sentiment‘).count()[‘text‘].plot.pie(autopct="%0.4f%%", subplots=True)
    #     plt.savefig("../upload/1.jpg")
    #     plt.show()
    #     # 将新建的DataFrame储存为MySQL中的数据表，不储存index列
    #     # df.to_sql(name=‘lallala‘, con=engine, index=False)
    #
    #     print(‘Read from and write to Mysql table successfully!‘)
    #     return render(request,"haha.html",locals())

class Uploads(View):
    def get(self,request):
        show = Image.objects.all()
        return render(request,"haha.html",locals())

haha.py

hdfs django hadoop