From 15c26a6b26b5153023e3e4077f98b80cbb95b634 Mon Sep 17 00:00:00 2001 From: eudoxia Date: Tue, 1 Dec 2020 16:09:46 -0500 Subject: URL encoding now uses its own C program; removed Python dependency --- md.sh | 11 ++++++----- sitemap.sh | 20 ++------------------ urlencode | Bin 0 -> 16776 bytes urlencode.c | 24 ++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 23 deletions(-) create mode 100755 urlencode create mode 100644 urlencode.c diff --git a/md.sh b/md.sh index 3441a65..b8d48c1 100755 --- a/md.sh +++ b/md.sh @@ -5,8 +5,9 @@ set -e # ./md.sh # ./md.sh # -# target dir: input `src.md`, output `index.html` -# dependencies: lowdown, fzf, python (for now) +# input: /src.md ; ./md-header ; ./md-footer +# output: /index.html +# dependencies: lowdown, fzf, ./urlencode # Copyright 2019-2020 DistressNetwork° # @@ -57,8 +58,8 @@ unesc() { sed -e 's;\\\&;\&;g' -e 's/\\\;/;/g' } -encode() ( # TODO: python't - python -c "import urllib, sys; print urllib.quote(sys.argv[1])" "$1" +encode() ( + ./urlencode ) decode() { @@ -118,7 +119,7 @@ tmp=$(mktemp -p /tmp) # using a tempfile, more convenient for storage and operat TOCLIST=$(grep -e '^#' $IN | sed -e 's; ;- ;' -e 's;^#;;' -e 's;#; ;g') # headers from source as md list echo "$TOCLIST" | while IFS='' read -r data ; do # encode headers as urls - HEADER=$(echo "$data" | sed 's/^[[:space:]-]*//' | encode "$(cat /dev/stdin)") ; + HEADER=$(echo "$data" | sed 's/^[[:space:]-]*//' | encode) ; echo "$data" | sed "s/^\([[:space:]-]*\)\(.*\)$/\1${HEADER}/" >> $tmp ; done diff --git a/sitemap.sh b/sitemap.sh index c6cfc38..d7c7b37 100755 --- a/sitemap.sh +++ b/sitemap.sh @@ -19,23 +19,7 @@ set -e # along with this program. If not, see . urlencode() ( - # urlencode - - old_lc_collate=$LC_COLLATE - LC_COLLATE=C - - length="${#1}" - i=1 - while [ $i -le $length ] ; do - c=$(expr substr "$1" $i 1) ; - case $c in - ([a-zA-Z0-9./~_-]) printf "$c" ;; - (*) printf '%%%02X' "'$c" ;; - esac ; - true $((i=i+1)) ; - done - - LC_COLLATE=$old_lc_collate + ./urlencode | sed 's;%2F;/;g' ) echo ' @@ -46,7 +30,7 @@ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' > $2 echo "$(find -L $1 -type d | sort | sed -e 's;^\./;;' -e '\;/\(.git\|css\|error\|favicon\|fonts\|js\|media\);d' -e 's;$;/;')" | \ while IFS='' read -r data ; do DATE=$(grep "last-modified" ${data}/index.html | cut -f 4 -d \" | sed 's/ /T/' ) ; - urlencode "$data" | sed -e 's;^;https://;' -e "s;$;${DATE};" ; + echo "$data" | urlencode | sed -e 's;^;https://;' -e "s;$;${DATE};" ; printf "\n" ; done >> $2 diff --git a/urlencode b/urlencode new file mode 100755 index 0000000..bd0df16 Binary files /dev/null and b/urlencode differ diff --git a/urlencode.c b/urlencode.c new file mode 100644 index 0000000..5a418f2 --- /dev/null +++ b/urlencode.c @@ -0,0 +1,24 @@ +#include +#include + +int main() { + char str[2047]; + char safe[] = "ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz1234567890-._~"; + char *ret; + int i; + + scanf("%[^\n]", str); + + for(i = 0; i < (strlen(str)); i++) { + + ret = strchr(safe, str[i]); + if(ret == NULL) { + printf("%%%02X", (unsigned)str[i] & 0xffU); + } else { + printf("%c", str[i]); + } + } + + printf("\n"); + return 0; +} -- cgit v1.2.3