#!/bin/sh

# Find repeats, that is, statements that are repeatedly proved but
# NOT top-level theorems/axioms.  This is an imperfect heuristic approach,
# but it can sometimes find some interesting things.
# The result is stored in ,embedded-counts

# Analyze the mmfile given in $1, by default set.mm.
mmfile="${1:-set.mm}"

# What are the top-level proven theorems/axioms?
metamath "read \"${mmfile}\"" 'set width 9999' 'show statement *' quit | \
  sed -e 's/  */ /g' | grep '^[1-9].*\$[ap]' | cut -d ' ' -f 4- | \
  sed -e 's/ *\$= .*$//' > ,statements


# What's been proven at any step?
metamath "read \"${mmfile}\"" 'set width 9999' 'show proof * /lemmon' quit | \
  sed -e 's/  */ /g' | grep -E '^[0-9]+ [1-9][0-9,]* [^ ]* \$[ap] ' | \
  cut -d ' ' -f 5- | \
  sed -e 's/ *\$\. *$//' > ,proven

# Show proven by any step NOT including theorems/axioms.
# "comm -23 ,proven ,statements" won't work because of duplicate lines.

grep -v -F -f ,statements < ,proven > ,embedded

sort ,embedded | uniq -c | sort -n -r > ,embedded-counts
