#!/bin/bash # Generates an extractor that saves C-array encoded data to a file to assist with extracting loadable, binary firmware from within drivers. # # Copyright (C) 2022 DiffieHellman # # This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version 3 for more details. #return invalid usage if there isn't 2 args [ "$#" -eq 1 ] || { echo "Usage: ./generate_extractor.sh "; exit 2; } #make the script automatically exit if any subcommand fails #set -euo pipefail set -euxo pipefail input="$1" output="output.c" squeezed_input="tmp.c" #create head of C extractor with {u,s}{8,16,32} mappings echo "/* Copyright (C) 2022 DiffieHellman C-array to file extractor. No copyright claims are made on C-array data that is to be extracted. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License version 3 for more details. */ #include #include #include #include typedef uint8_t u1Byte; typedef uint32_t u4Byte; typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; " > "$output" #convert all tabs, newlines and line feeds in the file into spaces and squeeze any repeated spaces to simplify regex tr '\t' ' ' < "$input" | tr '\n' ' ' | tr '\r' ' ' | tr --squeeze-repeats ' ' > "$squeezed_input" #extract C arrays while also preventing arbitrary code execution. grep in -z mode outputs \0 at the end of each match, which is changed to 2 newlines. sadly no UTF-8 support grep -ozE "((|const )(u|s)(8|16|32|64|1Byte)|(|unsigned )((|u)int(|_fast)(|8_t|16_t|32_t)|char|short|long|long long)) "\ "[[:alnum:]_]+ ?\[[A-Za-z ]*\]"\ "[A-Za-z_ ]*= ?"\ "{[[:space:][:digit:]A-Fa-f,x.]+} ?;" "$squeezed_input" | sed 's/\x0/\n\n/g' >> "$output" #get names of all arrays previously extracted names=$(grep '\[' "$output" | sed 's/ *\[.*//' | sed 's/.* //') #todo: handle automatically outputting arrays like: u8 long_winded_name[LongWindedName] = { #array to file function echo 'void openWriteClose(char filename[], char array[], int size) { FILE *fp = fopen(filename, "w"); if (!fp){perror("Could not open file"); exit(1);} ssize_t result = write(fileno(fp), array, size); if (!result){perror("Failed to write file"); exit(1);} fclose(fp); } int main(void) {' >> "$output" #convert space separted string to array conv_array=($names) #loop over array for name in ${conv_array[@]} do echo " openWriteClose(\"${name}\", ${name}, sizeof(${name}));" >> "$output" done echo " return 0; }" >> "$output" rm "$squeezed_input" #compile and run the extractor gcc "$output" -o extractor && ./extractor && rm extractor && rm "$output" grep '\[' "$input"