Small scripts in C
Daniel Lemire wrote an article about generating scripts in C++ using an LLM. Producing useful commands on the go using C++ and AI
The genrated code looks like this:
// assume clang++ 18 or g++ 13
// compile with: c++ -std=c++23 -o csv csv.cpp
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <map>
#include <print>
#include <ranges>
#include <sstream>
#include <string>
#include <unordered_set>
int main(int argc, char *argv[]) {
if (argc != 2) {
std::println("Usage: {} <csv_file>", argv[0]);
return EXIT_FAILURE;
}
std::ifstream file(argv[1]);
if (!file.is_open()) {
std::println("Error: Cannot open file {}\n", argv[1]);
return EXIT_FAILURE;
}
// Read first line for column labels
std::string line;
if (!std::getline(file, line)) {
std::println("Error: Empty file {}", argv[1]);
return EXIT_FAILURE;
}
// Parse first line into labels
std::map<size_t, std::string> labels;
auto cells = line | std::ranges::views::split(',');
for (auto cell : cells) {
std::string label(cell.begin(), cell.end());
labels[labels.size()] = label;
}
std::map<std::string, std::unordered_set<std::string>> columns;
while (std::getline(file, line)) {
auto cells = line | std::ranges::views::split(',');
for (auto [idx, cell] : std::ranges::views::enumerate(cells)) {
columns[labels[idx]].insert(std::string(cell.begin(), cell.end()));
}
}
// Print results using labels
for (const auto &[label, values] : columns) {
std::println("Column {}: {} distinct values", label, values.size());
}
return EXIT_SUCCESS;
}
I usually use C to write small scripts, I decided to rewrite the code above in C using sheepy:
#! /usr/bin/env sheepy
#include "libsheepyObject.h"
#define getJ(obj, key) getG(obj, rtSmallJsont, key)
#define getC(obj, key) getG(obj, rtChar, key)
int main(int argc, char** argv) {
if (argc != 2) {
logE("Usage: %s <csv file>", argv[0]);
return EXIT_FAILURE;
}
cleanAllocateSmallJson(file);
if (not readTextG(file, argv[1])) {
logE("Cannot open file %s", argv[1]);
return EXIT_FAILURE;
}
// Read first line for column labels
cleanFinishSmallJsonP(line) = getJ(file, 0);
if (not line) {
logE("Empty file %s",argv[1]);
return EXIT_FAILURE;
}
// Parse first line into labels
cleanSmallJsonP(labels) = splitG(line, ',');
delG(file, 0, 1);
cleanAllocateSmallJson(columns);
range(i, lenG(labels)) {
createSmallArray(a);
pushG(columns, &a);
}
iter(file, L) {
castS(l, L);
cleanSmallArrayP(cells) = splitG(l, ',');
range(i, lenG(labels)) {
cleanFinishSmallJsonP(col) = getJ(columns, i);
var c = getC(cells, i);
if (not hasG(col, c)) {
pushG(col, c);
setPG(columns, i, col);
}
}
}
// Print results using labels
iter(labels, lb) {
cleanFinishSmallJsonP(col) = getJ(columns, iterIndexG(labels));
logI("Column %s: %d distinct values", ssGet(lb), lenG(col));
}
return EXIT_SUCCESS;
}
// vim: set expandtab ts=2 sw=2:
I wonder if an LLM would be able to write this code since OpenAI and Google crawled on all the code in my libsheepy.
- The
clean*macros use the gnu c__cleanupattribute to call free when the variable is out of scope. smallJsontis a type which can be a bool, integer, string, array or dictionary.getGis a macro which uses c11__Genericto call a function depending on the parameter types.iteris a macro which calls the iterator interface of the object in first parameter.
Hashtag: #programming