同時使用impala和pyhive鏈接時會報這個錯誤侣签,原因是import順序不對導致沖突轧抗。
按照以下順序導入就不會報錯了:
from pyhive import hive
from impala.dbapi import connect as impalaConn
from impala.util import as_pandas
這里順便扔兩個簡單的查詢方法:
def selectFromImpala(sql):
ipaddr = config["hive"]["ipaddress"]
port = config["hive"]["port"]
username = config["hive"]["username"]
conn = impalaConn(host=ipaddr, port=int(port))
cursor = conn.cursor(user=username)
cursor.execute(sql)
df = as_pandas(cursor)
cursor.close()
conn.close()
return df
def selectFromHive(sql):
hostname = config["hive"]["hostname"]
port = config["hive"]["port"]
username = config["hive"]["username"]
conn = hive.connect(host=hostname,
port=port,
username=username)
df = pd.read_sql(sql, conn)
columns = df.columns
columns_dict = {column: column.split('.')[-1] for column in columns}
df.rename(columns=columns_dict, inplace=True)
conn.close()
return df