处理文本文件并在python中制作一个新文件

时间:2018-11-15 14:00:10

标签: python

我将新模型和标签文件都复制到iOS应用程序中。应用程序参数如下:

3 
5 
65 66 67 68 128
3 2 7 8 -1 
4 5 5 7 101
3 5 6 9 102
2 5 6 9 -1
3 6 7 8 103

#include "iostream"
#include "fstream"
#include "string"


using namespace std;

int main(){
    int line=0;
    string l;
    int rows=5;
    int cols=6;
    int count=0;

    int **a=new int*[rows];// Dynamic array creation 

    for (int k=0;k<rows;k++){
        a[k]=new int[cols];
    }
    ifstream myfile ("23.txt"); // Reading file 

    if(myfile.is_open()){
        while(!myfile.eof() ){
            myfile>>l;
            cout << l;

            if ((l =="\n")||(l=="\r")){ 
                line++;
            }
            if (line ==2){
                for (int i=0;i<rows;i++){    //storing transition table in dynamically create array
                    for(int j=0;j<cols;j++){
                        a[i][j]=(int)l[j];
                        cout << l[j] << '\t';
                        count++;
                    } 
                }   
            }
        }
    }
    myfile.close();    //close the file
}

2 个答案:

答案 0 :(得分:1)

您应该注意一些事情,可以帮助您修复代码:

  1. 查看您的变量名:,例如,在代码的开头,您将使用空列表覆盖归因于majorminor的文件对象; < / li>
  2. 跟踪循环的运行情况:
    • 如果您说要new_major的行数“是主文件中的行数的100倍”,那么您应该有一个内部循环可以迭代对于major file中的每一行,在外循环内进行100次操作(而不是遍历percent);
    • 您可能需要重新考虑生成final列表的循环。如果您的目标是使用“第7列” 来增加new_major行,那么如果您的外部循环遍历new_major行,您的代码就会变得更加清晰。然后,您可以使内部循环负责计算次要文件中的行数。
  3. 尝试逐步实现/测试您的代码:您清楚地想到了应该执行的步骤(因为您能够描述它们)。尝试一次实施和测试每个步骤。 “有点复杂” 问题总是可以分解成一些简单的小问题。
    • 利用Python是一种解释性语言,可以更轻松,更快速地测试少量逻辑/代码;
    • 例如,尝试在正确生成new_major文件之前 尝试做其余的事情。如果这样做,您会注意到 例如,new_major.append行未使用列 一行的值。 (您必须先分界线才能 这些值)。

  1. [额外]对解决方案的复杂性有所担心:在这种特殊情况下,复杂性可能对您来说不是问题,但让您自己思考这种问题总是很高兴的。想象一下,您有很长的minormajor文件,对其进行多次迭代可能会很昂贵。您可以做的一件事是根据第一列的值(可能使用dict)将次要文件“分解”为多个部分。因此,如果您对与minor行匹配的major条目进行计数,则只会迭代第一列值与参考行相同的条目。

答案 1 :(得分:1)

您可以将sum与生成器表达式一起使用,以对符合条件的小型匹配行进行计数,然后使用str.join生成所需的输出:

major = '''chr1    +   1071396 1271396 LOC
chr12   +   1101483 1121483 MIR200B'''

minor = '''chr1    1071496 1071536 1
chr1    1071536 1071566 0
chr1    1073566 1073366 1
chr12   1101487 1101516 0
chr12   1101625 1101671 1'''

major, minor = ([l.split() for l in d.splitlines()] for d in (major, minor))

for name_major, sign, low, high, note in major:
    parts = list(range(int(low), int(high) + 1, (int(high) - int(low)) // 100))
    for part, (low, high) in enumerate(zip(parts, parts[1:]), 1):
        count = sum(1 for name_minor, n1, n2, _ in minor if name_major == name_minor and all(low <= int(n) <= high for n in (n1, n2)))
        print('\t'.join((name_major, sign, str(low), str(high), note, '%s_part%d' % (note, part), str(count))))

这将输出:

chr1    +   1071396 1073396 LOC LOC_part1   2
chr1    +   1073396 1075396 LOC LOC_part2   0
chr1    +   1075396 1077396 LOC LOC_part3   0
chr1    +   1077396 1079396 LOC LOC_part4   0
chr1    +   1079396 1081396 LOC LOC_part5   0
chr1    +   1081396 1083396 LOC LOC_part6   0
chr1    +   1083396 1085396 LOC LOC_part7   0
chr1    +   1085396 1087396 LOC LOC_part8   0
chr1    +   1087396 1089396 LOC LOC_part9   0
chr1    +   1089396 1091396 LOC LOC_part10  0
chr1    +   1091396 1093396 LOC LOC_part11  0
chr1    +   1093396 1095396 LOC LOC_part12  0
chr1    +   1095396 1097396 LOC LOC_part13  0
chr1    +   1097396 1099396 LOC LOC_part14  0
chr1    +   1099396 1101396 LOC LOC_part15  0
chr1    +   1101396 1103396 LOC LOC_part16  0
chr1    +   1103396 1105396 LOC LOC_part17  0
chr1    +   1105396 1107396 LOC LOC_part18  0
chr1    +   1107396 1109396 LOC LOC_part19  0
chr1    +   1109396 1111396 LOC LOC_part20  0
chr1    +   1111396 1113396 LOC LOC_part21  0
chr1    +   1113396 1115396 LOC LOC_part22  0
chr1    +   1115396 1117396 LOC LOC_part23  0
chr1    +   1117396 1119396 LOC LOC_part24  0
chr1    +   1119396 1121396 LOC LOC_part25  0
chr1    +   1121396 1123396 LOC LOC_part26  0
chr1    +   1123396 1125396 LOC LOC_part27  0
chr1    +   1125396 1127396 LOC LOC_part28  0
chr1    +   1127396 1129396 LOC LOC_part29  0
chr1    +   1129396 1131396 LOC LOC_part30  0
chr1    +   1131396 1133396 LOC LOC_part31  0
chr1    +   1133396 1135396 LOC LOC_part32  0
chr1    +   1135396 1137396 LOC LOC_part33  0
chr1    +   1137396 1139396 LOC LOC_part34  0
chr1    +   1139396 1141396 LOC LOC_part35  0
chr1    +   1141396 1143396 LOC LOC_part36  0
chr1    +   1143396 1145396 LOC LOC_part37  0
chr1    +   1145396 1147396 LOC LOC_part38  0
chr1    +   1147396 1149396 LOC LOC_part39  0
chr1    +   1149396 1151396 LOC LOC_part40  0
chr1    +   1151396 1153396 LOC LOC_part41  0
chr1    +   1153396 1155396 LOC LOC_part42  0
chr1    +   1155396 1157396 LOC LOC_part43  0
chr1    +   1157396 1159396 LOC LOC_part44  0
chr1    +   1159396 1161396 LOC LOC_part45  0
chr1    +   1161396 1163396 LOC LOC_part46  0
chr1    +   1163396 1165396 LOC LOC_part47  0
chr1    +   1165396 1167396 LOC LOC_part48  0
chr1    +   1167396 1169396 LOC LOC_part49  0
chr1    +   1169396 1171396 LOC LOC_part50  0
chr1    +   1171396 1173396 LOC LOC_part51  0
chr1    +   1173396 1175396 LOC LOC_part52  0
chr1    +   1175396 1177396 LOC LOC_part53  0
chr1    +   1177396 1179396 LOC LOC_part54  0
chr1    +   1179396 1181396 LOC LOC_part55  0
chr1    +   1181396 1183396 LOC LOC_part56  0
chr1    +   1183396 1185396 LOC LOC_part57  0
chr1    +   1185396 1187396 LOC LOC_part58  0
chr1    +   1187396 1189396 LOC LOC_part59  0
chr1    +   1189396 1191396 LOC LOC_part60  0
chr1    +   1191396 1193396 LOC LOC_part61  0
chr1    +   1193396 1195396 LOC LOC_part62  0
chr1    +   1195396 1197396 LOC LOC_part63  0
chr1    +   1197396 1199396 LOC LOC_part64  0
chr1    +   1199396 1201396 LOC LOC_part65  0
chr1    +   1201396 1203396 LOC LOC_part66  0
chr1    +   1203396 1205396 LOC LOC_part67  0
chr1    +   1205396 1207396 LOC LOC_part68  0
chr1    +   1207396 1209396 LOC LOC_part69  0
chr1    +   1209396 1211396 LOC LOC_part70  0
chr1    +   1211396 1213396 LOC LOC_part71  0
chr1    +   1213396 1215396 LOC LOC_part72  0
chr1    +   1215396 1217396 LOC LOC_part73  0
chr1    +   1217396 1219396 LOC LOC_part74  0
chr1    +   1219396 1221396 LOC LOC_part75  0
chr1    +   1221396 1223396 LOC LOC_part76  0
chr1    +   1223396 1225396 LOC LOC_part77  0
chr1    +   1225396 1227396 LOC LOC_part78  0
chr1    +   1227396 1229396 LOC LOC_part79  0
chr1    +   1229396 1231396 LOC LOC_part80  0
chr1    +   1231396 1233396 LOC LOC_part81  0
chr1    +   1233396 1235396 LOC LOC_part82  0
chr1    +   1235396 1237396 LOC LOC_part83  0
chr1    +   1237396 1239396 LOC LOC_part84  0
chr1    +   1239396 1241396 LOC LOC_part85  0
chr1    +   1241396 1243396 LOC LOC_part86  0
chr1    +   1243396 1245396 LOC LOC_part87  0
chr1    +   1245396 1247396 LOC LOC_part88  0
chr1    +   1247396 1249396 LOC LOC_part89  0
chr1    +   1249396 1251396 LOC LOC_part90  0
chr1    +   1251396 1253396 LOC LOC_part91  0
chr1    +   1253396 1255396 LOC LOC_part92  0
chr1    +   1255396 1257396 LOC LOC_part93  0
chr1    +   1257396 1259396 LOC LOC_part94  0
chr1    +   1259396 1261396 LOC LOC_part95  0
chr1    +   1261396 1263396 LOC LOC_part96  0
chr1    +   1263396 1265396 LOC LOC_part97  0
chr1    +   1265396 1267396 LOC LOC_part98  0
chr1    +   1267396 1269396 LOC LOC_part99  0
chr1    +   1269396 1271396 LOC LOC_part100 0
chr12   +   1101483 1101683 MIR200B MIR200B_part1   2
chr12   +   1101683 1101883 MIR200B MIR200B_part2   0
chr12   +   1101883 1102083 MIR200B MIR200B_part3   0
chr12   +   1102083 1102283 MIR200B MIR200B_part4   0
chr12   +   1102283 1102483 MIR200B MIR200B_part5   0
chr12   +   1102483 1102683 MIR200B MIR200B_part6   0
chr12   +   1102683 1102883 MIR200B MIR200B_part7   0
chr12   +   1102883 1103083 MIR200B MIR200B_part8   0
chr12   +   1103083 1103283 MIR200B MIR200B_part9   0
chr12   +   1103283 1103483 MIR200B MIR200B_part10  0
chr12   +   1103483 1103683 MIR200B MIR200B_part11  0
chr12   +   1103683 1103883 MIR200B MIR200B_part12  0
chr12   +   1103883 1104083 MIR200B MIR200B_part13  0
chr12   +   1104083 1104283 MIR200B MIR200B_part14  0
chr12   +   1104283 1104483 MIR200B MIR200B_part15  0
chr12   +   1104483 1104683 MIR200B MIR200B_part16  0
chr12   +   1104683 1104883 MIR200B MIR200B_part17  0
chr12   +   1104883 1105083 MIR200B MIR200B_part18  0
chr12   +   1105083 1105283 MIR200B MIR200B_part19  0
chr12   +   1105283 1105483 MIR200B MIR200B_part20  0
chr12   +   1105483 1105683 MIR200B MIR200B_part21  0
chr12   +   1105683 1105883 MIR200B MIR200B_part22  0
chr12   +   1105883 1106083 MIR200B MIR200B_part23  0
chr12   +   1106083 1106283 MIR200B MIR200B_part24  0
chr12   +   1106283 1106483 MIR200B MIR200B_part25  0
chr12   +   1106483 1106683 MIR200B MIR200B_part26  0
chr12   +   1106683 1106883 MIR200B MIR200B_part27  0
chr12   +   1106883 1107083 MIR200B MIR200B_part28  0
chr12   +   1107083 1107283 MIR200B MIR200B_part29  0
chr12   +   1107283 1107483 MIR200B MIR200B_part30  0
chr12   +   1107483 1107683 MIR200B MIR200B_part31  0
chr12   +   1107683 1107883 MIR200B MIR200B_part32  0
chr12   +   1107883 1108083 MIR200B MIR200B_part33  0
chr12   +   1108083 1108283 MIR200B MIR200B_part34  0
chr12   +   1108283 1108483 MIR200B MIR200B_part35  0
chr12   +   1108483 1108683 MIR200B MIR200B_part36  0
chr12   +   1108683 1108883 MIR200B MIR200B_part37  0
chr12   +   1108883 1109083 MIR200B MIR200B_part38  0
chr12   +   1109083 1109283 MIR200B MIR200B_part39  0
chr12   +   1109283 1109483 MIR200B MIR200B_part40  0
chr12   +   1109483 1109683 MIR200B MIR200B_part41  0
chr12   +   1109683 1109883 MIR200B MIR200B_part42  0
chr12   +   1109883 1110083 MIR200B MIR200B_part43  0
chr12   +   1110083 1110283 MIR200B MIR200B_part44  0
chr12   +   1110283 1110483 MIR200B MIR200B_part45  0
chr12   +   1110483 1110683 MIR200B MIR200B_part46  0
chr12   +   1110683 1110883 MIR200B MIR200B_part47  0
chr12   +   1110883 1111083 MIR200B MIR200B_part48  0
chr12   +   1111083 1111283 MIR200B MIR200B_part49  0
chr12   +   1111283 1111483 MIR200B MIR200B_part50  0
chr12   +   1111483 1111683 MIR200B MIR200B_part51  0
chr12   +   1111683 1111883 MIR200B MIR200B_part52  0
chr12   +   1111883 1112083 MIR200B MIR200B_part53  0
chr12   +   1112083 1112283 MIR200B MIR200B_part54  0
chr12   +   1112283 1112483 MIR200B MIR200B_part55  0
chr12   +   1112483 1112683 MIR200B MIR200B_part56  0
chr12   +   1112683 1112883 MIR200B MIR200B_part57  0
chr12   +   1112883 1113083 MIR200B MIR200B_part58  0
chr12   +   1113083 1113283 MIR200B MIR200B_part59  0
chr12   +   1113283 1113483 MIR200B MIR200B_part60  0
chr12   +   1113483 1113683 MIR200B MIR200B_part61  0
chr12   +   1113683 1113883 MIR200B MIR200B_part62  0
chr12   +   1113883 1114083 MIR200B MIR200B_part63  0
chr12   +   1114083 1114283 MIR200B MIR200B_part64  0
chr12   +   1114283 1114483 MIR200B MIR200B_part65  0
chr12   +   1114483 1114683 MIR200B MIR200B_part66  0
chr12   +   1114683 1114883 MIR200B MIR200B_part67  0
chr12   +   1114883 1115083 MIR200B MIR200B_part68  0
chr12   +   1115083 1115283 MIR200B MIR200B_part69  0
chr12   +   1115283 1115483 MIR200B MIR200B_part70  0
chr12   +   1115483 1115683 MIR200B MIR200B_part71  0
chr12   +   1115683 1115883 MIR200B MIR200B_part72  0
chr12   +   1115883 1116083 MIR200B MIR200B_part73  0
chr12   +   1116083 1116283 MIR200B MIR200B_part74  0
chr12   +   1116283 1116483 MIR200B MIR200B_part75  0
chr12   +   1116483 1116683 MIR200B MIR200B_part76  0
chr12   +   1116683 1116883 MIR200B MIR200B_part77  0
chr12   +   1116883 1117083 MIR200B MIR200B_part78  0
chr12   +   1117083 1117283 MIR200B MIR200B_part79  0
chr12   +   1117283 1117483 MIR200B MIR200B_part80  0
chr12   +   1117483 1117683 MIR200B MIR200B_part81  0
chr12   +   1117683 1117883 MIR200B MIR200B_part82  0
chr12   +   1117883 1118083 MIR200B MIR200B_part83  0
chr12   +   1118083 1118283 MIR200B MIR200B_part84  0
chr12   +   1118283 1118483 MIR200B MIR200B_part85  0
chr12   +   1118483 1118683 MIR200B MIR200B_part86  0
chr12   +   1118683 1118883 MIR200B MIR200B_part87  0
chr12   +   1118883 1119083 MIR200B MIR200B_part88  0
chr12   +   1119083 1119283 MIR200B MIR200B_part89  0
chr12   +   1119283 1119483 MIR200B MIR200B_part90  0
chr12   +   1119483 1119683 MIR200B MIR200B_part91  0
chr12   +   1119683 1119883 MIR200B MIR200B_part92  0
chr12   +   1119883 1120083 MIR200B MIR200B_part93  0
chr12   +   1120083 1120283 MIR200B MIR200B_part94  0
chr12   +   1120283 1120483 MIR200B MIR200B_part95  0
chr12   +   1120483 1120683 MIR200B MIR200B_part96  0
chr12   +   1120683 1120883 MIR200B MIR200B_part97  0
chr12   +   1120883 1121083 MIR200B MIR200B_part98  0
chr12   +   1121083 1121283 MIR200B MIR200B_part99  0
chr12   +   1121283 1121483 MIR200B MIR200B_part100 0

请注意,您的预期输出不正确,因为chr1 1073566 1073366 1文件的minor中的两个数字并不都属于major部分的同一范围。