LINUX.ORG.RU

История изменений

Исправление Legioner, (текущая версия) :

Анонимус вдохновил. Первая версия, код тупо в лоб.

package test;

import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

public class WordsCounter {
    public static void main(String[] args) throws IOException {
        var startTime = System.nanoTime();

        var wordStats = new HashMap<String, WordStat>();

        var inputPath = Path.of("C:\\Users\\Vladimir\\Downloads\\huge.txt");
        try (var fileChannel = FileChannel.open(inputPath)) {
            MappedByteBuffer mappedBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
            var wordBuilder = new StringBuilder();
            while (mappedBuffer.hasRemaining()) {
                byte b = mappedBuffer.get();
                if (('A' <= b) && (b <= 'Z')) {
                    wordBuilder.append((char) (b + ('a' - 'A')));
                } else if (('a' <= b) && (b <= 'z')) {
                    wordBuilder.append((char) b);
                } else {
                    if (!wordBuilder.isEmpty()) {
                        var word = wordBuilder.toString();
                        var wordStat = wordStats.get(word);
                        if (wordStat == null) {
                            wordStat = new WordStat(word);
                            wordStats.put(word, wordStat);
                        } else {
                            wordStat.incCount();
                        }
                        wordBuilder.delete(0, wordBuilder.length());
                    }
                }
            }
        }

        var part1Time = System.nanoTime();
        System.out.println("Part 1: " + (part1Time - startTime));

        var wordStatList = new ArrayList<>(wordStats.values());
        wordStatList.sort(Comparator.comparing(WordStat::count).reversed());

        var part2Time = System.nanoTime();
        System.out.println("Part 2: " + (part2Time - part1Time));

        var resultPath = Path.of("tmp\\result.txt");
        try (var writer = Files.newBufferedWriter(resultPath)) {
            for (WordStat wordStat : wordStatList) {
                writer.write(Integer.toString(wordStat.count()));
                writer.write(' ');
                writer.write(wordStat.word());
                writer.newLine();
            }
        }
        var part3Time = System.nanoTime();
        System.out.println("Part 3: " + (part3Time - part2Time));
        System.out.println("Total: " + (part3Time - startTime));
    }

    static class WordStat {
        private final String word;
        private int count;

        WordStat(String word) {
            this.word = word;
            count = 1;
        }

        String word() {
            return word;
        }

        int count() {
            return count;
        }

        void incCount() {
            count++;
        }
    }
}

Отрабатывает за

Part 1: 4861061700
Part 2: 62542400
Part 3: 59626800
Total: 4983230900

Ну т.е. за 5 секунд примерно.

Сейчас попробую все 4 ядра загрузить. Думаю, секунду-другую есть шансы выиграть.

Исходная версия Legioner, :

Анонимус вдохновил. Первая версия, код тупо в лоб.

[code=java] package test;

import java.io.; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.SeekableByteChannel; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.;

public class WordsCounter { public static void main(String[] args) throws IOException { var startTime = System.nanoTime();

    var wordStats = new HashMap<String, WordStat>();

    var inputPath = Path.of("C:\\Users\\Vladimir\\Downloads\\huge.txt");
    try (var fileChannel = FileChannel.open(inputPath)) {
        MappedByteBuffer mappedBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
        var wordBuilder = new StringBuilder();
        while (mappedBuffer.hasRemaining()) {
            byte b = mappedBuffer.get();
            if (('A' <= b) && (b <= 'Z')) {
                wordBuilder.append((char) (b + ('a' - 'A')));
            } else if (('a' <= b) && (b <= 'z')) {
                wordBuilder.append((char) b);
            } else {
                if (!wordBuilder.isEmpty()) {
                    var word = wordBuilder.toString();
                    var wordStat = wordStats.get(word);
                    if (wordStat == null) {
                        wordStat = new WordStat(word);
                        wordStats.put(word, wordStat);
                    } else {
                        wordStat.incCount();
                    }
                    wordBuilder.delete(0, wordBuilder.length());
                }
            }
        }
    }

    var part1Time = System.nanoTime();
    System.out.println("Part 1: " + (part1Time - startTime));

    var wordStatList = new ArrayList<>(wordStats.values());
    wordStatList.sort(Comparator.comparing(WordStat::count).reversed());

    var part2Time = System.nanoTime();
    System.out.println("Part 2: " + (part2Time - part1Time));

    var resultPath = Path.of("tmp\\result.txt");
    try (var writer = Files.newBufferedWriter(resultPath)) {
        for (WordStat wordStat : wordStatList) {
            writer.write(Integer.toString(wordStat.count()));
            writer.write(' ');
            writer.write(wordStat.word());
            writer.newLine();
        }
    }
    var part3Time = System.nanoTime();
    System.out.println("Part 3: " + (part3Time - part2Time));
    System.out.println("Total: " + (part3Time - startTime));
}

static class WordStat {
    private final String word;
    private int count;

    WordStat(String word) {
        this.word = word;
        count = 1;
    }

    String word() {
        return word;
    }

    int count() {
        return count;
    }

    void incCount() {
        count++;
    }
}

} [/code]

Отрабатывает за

[code] Part 1: 4861061700 Part 2: 62542400 Part 3: 59626800 Total: 4983230900 [/code]

Ну т.е. за 5 секунд примерно.

Сейчас попробую все 4 ядра загрузить. Думаю, секунду-другую есть шансы выиграть.