История изменений

Исправление olegd, 09.10.19 15:01 (текущая версия) :

попробуй numpy

numpy.loadtxt падает, если в строках получается разное число столбцов.

Попробовал для однократного чтения:

#!/usr/bin/python3
from sys import argv
import time
import numpy as np

for fil in argv[1:]:

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    t1 = time.perf_counter()
    print( 'ar.splitlines():', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    for line in range(len(lines)):
        lines[line] = lines[line].split()
    #print( lines )
    t1 = time.perf_counter()
    print( 'replace:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines2 = []
    for line in range(len(lines)):
        lines2.append(lines[line].split())
    #print( lines2 )
    t1 = time.perf_counter()
    print( 'append:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines = [ l.split() for l in lines ]
    #print( lines )
    t1 = time.perf_counter()
    print( '[]:', t1-t0 )

    t0 = time.perf_counter()
    with open(fil) as f:
      lines = [*map(lambda x: x.split(), f), ]
    #print(lines)
    t1 = time.perf_counter()
    print( 'lambda:', t1-t0 )

    t0 = time.perf_counter()
    lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None, 
      skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
    t1 = time.perf_counter()
    #print( lines )
    print( 'numpy:', t1-t0 )

На файл, с которым другие работали 0,21-0,23 секунд, Numpy.loadtxt затратила 2,5 с.

Исправление olegd, 09.10.19 15:00:

попробуй numpy

numpy.loadtxt падает, если в строках получается разное число столбцов.

Попробовал для однократного чтения:

#!/usr/bin/python3
from sys import argv
import time
import numpy as np

for fil in argv[1:]:

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    t1 = time.perf_counter()
    print( 'ar.splitlines():', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    for line in range(len(lines)):
        lines[line] = lines[line].split()
    #print( lines )
    t1 = time.perf_counter()
    print( 'replace:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines2 = []
    for line in range(len(lines)):
        lines2.append(lines[line].split())
    #print( lines2 )
    t1 = time.perf_counter()
    print( 'append:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines = [ l.split() for l in lines ]
    #print( lines )
    t1 = time.perf_counter()
    print( '[]:', t1-t0 )

    t0 = time.perf_counter()
    with open(fil) as f:
      lines = [*map(lambda x: x.split(), f), ]
    #print(lines)
    t1 = time.perf_counter()
    print( 'lambda:', t1-t0 )

    t0 = time.perf_counter()
    lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None, 
      skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
    t1 = time.perf_counter()
    #print( lines )
    print( 'numpy:', t1-t0 )

На файл, с которым другие работали 0,21-0,22 секунд, Numpy.loadtxt затратила 2,5 с.

Исходная версия olegd, 09.10.19 14:58:

попробуй numpy

numpy.loadtxt падает, если в строках получается разное число столбцов.

Попробовал для однократного чтения:

#!/usr/bin/python3
from sys import argv
import time
import numpy as np

for fil in argv[1:]:

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    t1 = time.perf_counter()
    print( 'ar.splitlines():', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    for line in range(len(lines)):
        lines[line] = lines[line].split()
    #print( lines )
    t1 = time.perf_counter()
    print( 'replace:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines2 = []
    for line in range(len(lines)):
        lines2.append(lines[line].split())
    #print( lines2 )
    t1 = time.perf_counter()
    print( 'replace:', t1-t0 )

    t0 = time.perf_counter()
    ar = open( fil, 'rt' ).read()
    lines = ar.splitlines()
    lines = [ l.split() for l in lines ]
    #print( lines )
    t1 = time.perf_counter()
    print( '[]:', t1-t0 )

    t0 = time.perf_counter()
    with open(fil) as f:
      lines = [*map(lambda x: x.split(), f), ]
    #print(lines)
    t1 = time.perf_counter()
    print( 'lambda:', t1-t0 )

    t0 = time.perf_counter()
    lines = np.loadtxt( fil, dtype=str, comments=None, delimiter=None, converters=None, 
      skiprows=0, usecols=None, unpack=False, ndmin=2, encoding='bytes' )
    t1 = time.perf_counter()
    #print( lines )
    print( 'numpy:', t1-t0 )

На файл, с которым другие работали 0,21-0,22 секунд, Numpy.loadtxt затратила 2,5 с.