python os.makedirs没有创建目录

时间:2018-12-27 09:41:14

标签: python

出于某种愚蠢的原因,以下内容用于创建存储在output_result_dir中的不存在的目录,而不是创建存储在output_log_dir中的不存在的目录。后者导致FileNotFound错误,描述为“ [WinError 3]系统找不到指定的路径:'runpackage1 \ calculated \ logs'”

log_dir = os.path.join(output_dir, 'logs')
output_config_dir = os.path.join(output_dir, 'config')
output_result_dir = os.path.join(output_dir, 'results')
for directory in [output_result_dir, log_dir, output_config_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)

我在这里想念一些真正愚蠢的东西吗?我还尝试在路径周围插入os.path.abspath以获取相对路径到完整路径,但这也无济于事。

编辑:正如某些人正确指出的那样,从路径更改了目录。我实际上将代码从两个单独的函数复制到一个独立的块中,从而引入了错误。造成我问题的不是问题。

2 个答案:

答案 0 :(得分:1)

无法确认您的错误(https://pyfiddle.io-3.6):

import os 

for root,dirs,files in os.walk("./"):
    print (root,dirs)
print("")

output_dir = "./temp"
log_dir = os.path.join(output_dir, 'logs')
output_config_dir = os.path.join(output_dir, 'config')
output_result_dir = os.path.join(output_dir, 'results')
for directory in [output_result_dir, log_dir, output_config_dir]:
    os.makedirs(directory)

for root,dirs,files in os.walk("./"):
    print (root,dirs)

输出:

('./', [])        # before

('./', ['temp'])  # after
('./temp', ['results', 'logs', 'config'])
('./temp/results', [])
('./temp/logs', [])

创建mcve解决了您的问题。


log_dir = os.path.join(output_dir, 'logs')
output_config_dir = os.path.join(output_dir, 'config')
output_result_dir = os.path.join(output_dir, 'results')
for directory in [output_result_dir, log_dir, output_config_dir]:   # using directory
    if not os.path.exists(path):                                    # using path
        os.makedirs(path)                                           # using path

您也不需要保护if not os.path.exists(path):-不需要,因为如果目录已经存在-使用os.makedirs(path)不会改变提供exists_ok=True的条件。

修复:

log_dir = os.path.join(output_dir, 'logs')
output_config_dir = os.path.join(output_dir, 'config')
output_result_dir = os.path.join(output_dir, 'results')
for directory in [output_result_dir, log_dir, output_config_dir]:   # directory
    os.makedirs(directory, exists_ok=True)                          # directory

答案 1 :(得分:0)

给定的代码没有错误,您已对其进行了修复。

from pyspark.sql.functions import explode, array, struct, lit, col
df1 = sc.parallelize([(1, "book1", 1), (2, "book2", 2), (3, "book3", 3), (4, "book4", 4)]).toDF(["primary_key", "book", "number"])
df1.show()
+-----------+-----+------+
|primary_key| book|number|
+-----------+-----+------+
|          1|book1|     1|
|          2|book2|     2|
|          3|book3|     3|
|          4|book4|     4|
+-----------+-----+------+

df2 = sc.parallelize([(1, "book1", 1), (2, "book8", 8), (3, "book3", 7), (5, "book5", 5)]).toDF(["primary_key", "book", "number"])
df2.show()
+-----------+-----+------+
|primary_key| book|number|
+-----------+-----+------+
|          1|book1|     1|
|          2|book8|     8|
|          3|book3|     7|
|          5|book5|     5|
+-----------+-----+------+

def to_transpose(df, by):

    # Filter dtypes and split into column names and type description
    cols, dtypes = zip(*((c, t) for (c, t) in df.dtypes if c not in by))
    # Spark SQL supports only homogeneous columns
    assert len(set(dtypes)) == 1, "All columns have to be of the same type"

    # Create and explode an array of (column_name, column_value) structs
    kvs = explode(array([
      struct(lit(c).alias("key"), col(c).alias("val")) for c in cols
    ])).alias("kvs")

    return df.select(by + [kvs]).select(by + ["kvs.key", "kvs.val"])

df1_trans = to_transpose(df1.withColumn('number',col('number').cast('string')), ["primary_key"])\
            .withColumnRenamed("val","dataframe1")\
            .withColumnRenamed("key","diff_column_name")
df2_trans=to_transpose(df2.withColumn('number',col('number').cast('string')), ["primary_key"])\
            .withColumnRenamed("val","dataframe2")\
            .withColumnRenamed("key","diff_column_name")

df = df1_trans.join(df2_trans, ['primary_key','diff_column_name'], how='full')
df = df.where((col('dataframe1')!= col('dataframe2')) 
              | (col('dataframe1').isNotNull() & col('dataframe2').isNull()) 
              | (col('dataframe2').isNotNull() & col('dataframe1').isNull())).sort('primary_key')
df = df.show()
+-----------+----------------+----------+----------+
|primary_key|diff_column_name|dataframe1|dataframe2|
+-----------+----------------+----------+----------+
|          2|            book|     book2|     book8|
|          2|          number|         2|         8|
|          3|          number|         3|         7|
|          4|            book|     book4|      null|
|          4|          number|         4|      null|
|          5|            book|      null|     book5|
|          5|          number|      null|         5|
+-----------+----------------+----------+----------+