/*- * Copyright (c) 2018 Robert Clausecker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* puzzledist.c -- compute the number of puzzles with the given distance */ #ifdef __SSE__ # include <immintrin.h> #endif #include <assert.h> #include <stdalign.h> #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <string.h> /* * One configuration of a 24 puzzle. A 24 puzzle configuration * comprises 24 tiles labelled 1 to 24 arranged in a 5x5 grid with one * spot remaining empty. The goal of the puzzle is to arrange the * tiles like on the left: * * [] 1 2 3 4 1 2 3 4 5 * 5 6 7 8 9 6 7 8 9 10 * 10 11 12 13 14 11 12 13 14 15 * 15 16 17 18 19 16 17 18 19 20 * 20 21 22 23 24 21 22 23 24 [] * * Note that this arrangement is different from the traditional * arrangement on the right. It is however isomorphic to the * traditional tile arrangement by changing coordinates and tile * numbers. * * To simplify the algorithms we want to run on them, puzzle * configurations are stored in two ways: First, the position of * each tile is stored in tiles[], then, the tile on each grid * position is stored in grid[] with 0 indicating the empty spot. * If viewed as permutations of { 0, ..., 24 }, tiles[] and grid[] * are inverse to each other at any given time. */ enum { TILE_COUNT = 25, ZERO_TILE = 0 }; struct puzzle { alignas(8) unsigned char tiles[TILE_COUNT], grid[TILE_COUNT]; }; /* * A solved puzzle configuration. */ static struct puzzle solved_puzzle = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }, }; /* * List of all possible moves for a given position of the empty square. * There are up to four moves from every square, if there are less, the * remainder is filled up with -1. */ static const signed char movetab[TILE_COUNT][4] = { 1, 5, -1, -1, 0, 2, 6, -1, 1, 3, 7, -1, 2, 4, 8, -1, 3, 9, -1, -1, 0, 6, 10, -1, 1, 5, 7, 11, 2, 6, 8, 12, 3, 7, 9, 13, 4, 8, 14, -1, 5, 11, 15, -1, 6, 10, 12, 16, 7, 11, 13, 17, 8, 12, 14, 18, 9, 13, 19, -1, 10, 16, 20, -1, 11, 15, 17, 21, 12, 16, 18, 22, 13, 17, 19, 23, 14, 18, 24, -1, 15, 21, -1, -1, 16, 20, 22, -1, 17, 21, 23, -1, 18, 22, 24, -1, 19, 23, -1, -1, }; /* * Return the location of the zero tile in p. */ static inline size_t zero_location(const struct puzzle *p) { return (p->tiles[ZERO_TILE]); } /* * Move the empty square to dloc, modifying p. It is not tested whether * dest is adjacent to the empty square's current location. Furtermore, * this function assumes 0 <= dloc < 25. */ static inline void move(struct puzzle *p, size_t dloc) { size_t zloc, dtile; dtile = p->grid[dloc]; zloc = zero_location(p); p->grid[dloc] = ZERO_TILE; p->grid[zloc] = dtile; p->tiles[dtile] = zloc; p->tiles[ZERO_TILE] = dloc; } /* * Return the number of moves when the empty square is at z. It is * assumed that 0 <= z < 25. */ static inline size_t move_count(size_t z) { /* * 0xefffee is 01110 11111 11111 11111 01110, * 0x07e9c0 is 00000 01110 01110 01110 00000, * i.e. everything but the corners and everything but the border. */ return (2 + ((0xefffee & 1 << z) != 0) + ((0x0739c0 & 1 << z) != 0)); } /* * Return the possible moves from square z. Up to four moves are * possible, the exact number can be found using move_count(z). If * less than four moves are possible, the last one or two entries are * marked with -1. It is assumed that 0 <= z < 25. */ static inline const signed char * get_moves(size_t z) { return (movetab[z]); } /* * To save storage, we store puzzles using a compact representation with * five bits per entry, not storing the position of the zero tile. * Additionally, four bits are used to store all moves that lead back to * the previous generation. This leads to 24 * 5 + 4 = 124 bits of * storage being required in total, split into two 64 bit quantities. * lo and hi store 12 tiles @ 5 bits each, lo additionally stores 4 move * mask bits in its least significant bits. */ struct compact_puzzle { unsigned long long lo, hi; }; #define MOVE_MASK 0xfull /* * An array of struct compact_puzzle with the given length and capacity. */ struct cp_slice { struct compact_puzzle *data; size_t len, cap; }; /* * Translate a struct puzzle into a struct compact_puzzle. */ static void pack_puzzle(struct compact_puzzle *restrict cp, const struct puzzle *restrict p) { #ifdef __BMI2__ /* pext is available iff pdep is available */ unsigned long long scratch, data; data = *(const unsigned long long*)&p->tiles[1]; scratch = _pext_u64(data, 0x1f1f1f1f1f1f1f1full) << 4; data = *(unsigned*)&p->tiles[9]; scratch |= (unsigned long long)_pext_u32(data, 0x1f1f1f1fu) << 4 + 8 * 5; cp->lo = scratch; data = *(const unsigned long long*)&p->tiles[13]; scratch = _pext_u64(data, 0x1f1f1f1f1f1f1f1full); data = *(unsigned*)&p->tiles[21]; scratch |= (unsigned long long)_pext_u32(data, 0x1f1f1f1fu) << 8 * 5; cp->hi = scratch; #else /* !defined(__BMI2__) */ size_t i; cp->lo = 0; cp->hi = 0; for (i = 1; i <= 12; i++) cp->lo |= (unsigned long long)p->tiles[i] << 5 * (i - 1) + 4; for (; i < TILE_COUNT; i++) cp->hi |= (unsigned long long)p->tiles[i] << 5 * (i - 13); #endif /* defined(__BMI2__) */ } /* * Pack p into cp, masking out the move that leads to dest. */ static void pack_puzzle_masked(struct compact_puzzle *restrict cp, const struct puzzle *restrict p, int dest) { size_t i, n_moves; int zloc; const signed char *moves; pack_puzzle(cp, p); zloc = zero_location(p); n_moves = move_count(zloc); moves = get_moves(zloc); for (i = 0; i < n_moves; i++) if (moves[i] == dest) cp->lo |= 1 << i; } /* * Translate a struct compact_puzzle back into a struct puzzle. */ static void unpack_puzzle(struct puzzle *restrict p, const struct compact_puzzle *restrict cp) { #ifdef __BMI2__ size_t i; unsigned long long data; memset(p, 0, sizeof *p); data = _pdep_u64(cp->lo >> 4, 0x1f1f1f1f1f1f1f1full); *(unsigned long long *)&p->tiles[1] = data; data = _pdep_u32(cp->lo >> 4 + 8 * 5, 0x1f1f1f1fu); *(unsigned *)&p->tiles[9] = data; data = _pdep_u64(cp->hi, 0x1f1f1f1f1f1f1f1full); *(unsigned long long *)&p->tiles[13] = data; data = _pdep_u32(cp->hi >> 8 * 5, 0x1f1f1f1fu); *(unsigned *)&p->tiles[21] = data; for (i = 1; i < TILE_COUNT; i++) p->grid[p->tiles[i]] = i; #else /* !defined(__BMI2__) */ size_t i; unsigned long long accum; memset(p, 0, sizeof *p); accum = cp->lo >> 4; for (i = 1; i <= 12; i++) { p->tiles[i] = accum & 31; p->grid[accum & 31] = i; accum >>= 5; } accum = cp->hi; for (; i < TILE_COUNT; i++) { p->tiles[i] = accum & 31; p->grid[accum & 31] = i; accum >>= 5; } #endif /* defined(__BMI2__) */ /* find the location of the zero tile in grid and set p->tiles[0] */ #ifdef __AVX2__ __m256i grid, gridmask; grid = _mm256_loadu_si256((const __m256i*)&p->grid); gridmask = _mm256_cmpeq_epi8(grid, _mm256_setzero_si256()); p->tiles[0] = __builtin_ctz(_mm256_movemask_epi8(gridmask)); #elif defined(__SSE2__) __m128i gridlo, gridhi, gridmasklo, gridmaskhi, zero; gridlo = _mm_loadu_si128((const __m128i*)&p->grid + 0); gridhi = _mm_loadu_si128((const __m128i*)&p->grid + 1); zero = _mm_setzero_si128(); gridmasklo = _mm_cmpeq_epi8(gridlo, zero); gridmaskhi = _mm_cmpeq_epi8(gridhi, zero); p->tiles[0] = __builtin_ctz(_mm_movemask_epi8(gridmaskhi) << 16 | _mm_movemask_epi8(gridmasklo)); #else /* !defined(__AVX2__) && !defined(__SSE2__) */ p->tiles[0] = strlen(p->grid); #endif } /* * Append cp to slice cps and resize if required. */ static void cps_append(struct cp_slice *cps, const struct compact_puzzle *cp) { struct compact_puzzle *newdata; size_t newcap; if (cps->len >= cps->cap) { newcap = cps->cap < 64 ? 64 : cps->cap * 13 / 8; newdata = realloc(cps->data, newcap * sizeof *cps->data); if (newdata == NULL) { perror("realloc"); exit(EXIT_FAILURE); } cps->data = newdata; cps->cap = newcap; } cps->data[cps->len++] = *cp; } /* * Initialize the content of cps to an empty slice. */ static void cps_init(struct cp_slice *cps) { cps->data = NULL; cps->len = 0; cps->cap = 0; } /* * Release all storage associated with cps. The content of cps is * undefined afterwards. */ static void cps_free(struct cp_slice *cps) { free(cps->data); } /* * Perform all unmasked moves from cp and add them to cps. */ static void expand(struct cp_slice *cps, const struct compact_puzzle *cp) { struct puzzle p; struct compact_puzzle ncp; size_t i, n_move; int movemask = cp->lo & MOVE_MASK, zloc; const signed char *moves; unpack_puzzle(&p, cp); zloc = zero_location(&p); n_move = move_count(zloc); moves = get_moves(zloc); for (i = 0; i < n_move; i++) { if (movemask & 1 << i) continue; move(&p, moves[i]); pack_puzzle_masked(&ncp, &p, zloc); move(&p, zloc); cps_append(cps, &ncp); } } /* * Given a sorted struct cp_slice cps, coalesce identical puzzles, * oring their move masks. Since the move mask bits are the least * significatn bits in lo, puzzles differing only by their move * masks end up next to each other. */ static void coalesce(struct cp_slice *cps) { struct compact_puzzle *a, *b, *newdata; size_t i, j; if (cps->len == 0) return; /* invariant: i < j */ for (i = 0, j = 1; j < cps->len; j++) { a = &cps->data[i]; b = &cps->data[j]; /* are a and b equal, ignoring the move mask? */ if (a->hi == b->hi && ((a->lo ^ b->lo) & ~MOVE_MASK) == 0) a->lo |= b->lo; else cps->data[++i] = *b; } cps->len = i + 1; /* conserve storage */ newdata = realloc(cps->data, cps->len * sizeof *cps->data); if (newdata != NULL) { cps->data = newdata; cps->cap = cps->len; } } /* * Compare two struct compact_puzzle in a manner suitable for qsort. */ static int compare_cp(const void *a_arg, const void *b_arg) { const struct compact_puzzle *a = a_arg, *b = b_arg; if (a->hi != b->hi) return ((a->hi > b->hi) - (a->hi < b->hi)); else return ((a->lo > b->lo) - (a->lo < b->lo)); } /* * Expand vertices in cps and store them in new_cps. Then sort and * coalesce new_cps. */ static void do_round(struct cp_slice *restrict new_cps, const struct cp_slice *restrict cps) { size_t i; for (i = 0; i < cps->len; i++) expand(new_cps, &cps->data[i]); qsort(new_cps->data, new_cps->len, sizeof *new_cps->data, compare_cp); coalesce(new_cps); } extern int main() { struct cp_slice old_cps, new_cps; struct compact_puzzle cp; cps_init(&new_cps); pack_puzzle(&cp, &solved_puzzle); cps_append(&new_cps, &cp); for (;;) { printf("%zu\n", new_cps.len); fflush(stdout); old_cps = new_cps; cps_init(&new_cps); do_round(&new_cps, &old_cps); cps_free(&old_cps); } }