From fcd667a0ebe73a7868e93232faea36d56535be18 Mon Sep 17 00:00:00 2001 From: eudoxia Date: Mon, 2 May 2022 12:37:11 -0400 Subject: reorganised docs locations (doc/* -> doc/online/*, doc/freestanding/* -> doc/*) --- doc/data.html | 126 +++++----- doc/freestanding/data.html | 564 ------------------------------------------- doc/freestanding/log.html | 109 --------- doc/freestanding/tangle.html | 148 ------------ doc/freestanding/weave.html | 132 ---------- doc/header | 8 - doc/log.html | 64 ++--- doc/online/data.html | 560 ++++++++++++++++++++++++++++++++++++++++++ doc/online/header | 8 + doc/online/log.html | 105 ++++++++ doc/online/tangle.html | 144 +++++++++++ doc/online/weave.html | 128 ++++++++++ doc/tangle.html | 68 +++--- doc/usage.md | 4 +- doc/weave.html | 66 ++--- gen-docs.sh | 25 +- gen-freestanding.sh | 11 - gen-online.sh | 26 ++ 18 files changed, 1148 insertions(+), 1148 deletions(-) mode change 100755 => 100644 doc/data.html delete mode 100644 doc/freestanding/data.html delete mode 100644 doc/freestanding/log.html delete mode 100644 doc/freestanding/tangle.html delete mode 100644 doc/freestanding/weave.html delete mode 100644 doc/header mode change 100755 => 100644 doc/log.html create mode 100755 doc/online/data.html create mode 100644 doc/online/header create mode 100755 doc/online/log.html create mode 100755 doc/online/tangle.html create mode 100755 doc/online/weave.html mode change 100755 => 100644 doc/tangle.html mode change 100755 => 100644 doc/weave.html delete mode 100755 gen-freestanding.sh create mode 100755 gen-online.sh diff --git a/doc/data.html b/doc/data.html old mode 100755 new mode 100644 index 9fddef4..05bc7cf --- a/doc/data.html +++ b/doc/data.html @@ -3,57 +3,69 @@ - - - - - - -data.lp — DistressNetwork° - + + -
-
-
data.lp
-
+

data.lp

This file contains the various data processing-related constants and functions referenced by the tangling and weaving processes.

*:

-
(License)
+
(License)
 
-(Imports)
+(Imports)
 
-(Processing limits)
+(Processing limits)
 
-(Formatting keywords)
+(Formatting keywords)
 
-(Configuration keywords)
+(Configuration keywords)
 
-(Data structure types)
+(Data structure types)
 
-(Error set)
+(Error set)
 
-(Line splitting function)
+(Line splitting function)
 
-(Configuration searching function)
+(Configuration searching function)
 
-(Section searching function)
+(Section searching function)
 
-(Command type detection function)
+(Command type detection function)
 
-(Parsing functions)
+(Parsing functions)
 
-(Code generation functions)
+(Code generation functions)
 
-(Text generation function)
+(Text generation function)
 

License:

-
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 

Constants

@@ -173,7 +185,7 @@ pub const LineRange = struct { var buffer = std.ArrayList([]const u8).init(alloc); defer buffer.deinit(); - (Split file at each newline) + (Split file at each newline) return buffer.toOwnedSlice(); } @@ -208,11 +220,11 @@ while (iterator.next()) |line| { return try fmt_conf(line, key, alloc); } } - log(.err, "config declaration '{s}' not found", .{std.mem.trimRight(u8, key, " \t")}); + log(.err, "config declaration '{s}' not found", .{std.mem.trimRight(u8, key, " \t")}); return error.NotFound; } -(Auxiliary formatting function) +(Auxiliary formatting function)

If the declaration is found, its contained format string is split along instances of the section name escape sequence, and each substring has its instances of the newline escape sequence replaced with a literal newline.

@@ -240,7 +252,7 @@ while (iterator.next()) |line| { for (list) |section, index| { if (std.mem.eql(u8, section.name, name)) return index; } - log(.err, "section '{s}' not found", .{name}); + log(.err, "section '{s}' not found", .{name}); return error.NotFound; }
@@ -276,7 +288,7 @@ fn command_type(line: []const u8) CommandType { var sections = std.ArrayList(Section).init(alloc); defer sections.deinit(); - (Main parsing routine) + (Main parsing routine) return sections.toOwnedSlice(); } @@ -285,7 +297,7 @@ fn parse_code(lines: [][]const u8, index: u32, alloc: Allocator) !CodeReturn { var content = std.ArrayList(Content).init(alloc); defer content.deinit(); - (Code parsing subroutine) + (Code parsing subroutine) return CodeReturn{ .content = content.toOwnedSlice(), .index = i + 1 }; } @@ -310,10 +322,10 @@ while (i < lines.len) { const line = lines[i]; switch (command_type(line)) { .start => { - (Add new section) + (Add new section) }, .add => { - (Append to section) + (Append to section) }, .end => { log(.err, "line {d}: unexpected section end", .{i + 1}); @@ -331,12 +343,12 @@ while (i < lines.len) {

Add new section:

const name = line[(k_start.len)..];
-log(.debug, "({d}) starting section '{s}'", .{ i + 1, name });
+log(.debug, "({d}) starting section '{s}'", .{ i + 1, name });
 
 const section = try parse_code(lines, i + 1, alloc);
 try sections.append(.{ .name = name, .content = section.content });
 
-log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
+log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
 i = section.index;
 
@@ -345,7 +357,7 @@ i = section.index;

Append to section:

const name = line[(k_add.len)..];
-log(.debug, "({d}) appending to section '{s}'", .{ i + 1, name });
+log(.debug, "({d}) appending to section '{s}'", .{ i + 1, name });
 
 const section = try parse_code(lines, i + 1, alloc);
 const index = try search(sections.items, name);
@@ -353,7 +365,7 @@ const old = &sections.items[index];
 const new = try std.mem.concat(alloc, Content, &[_][]const Content{ old.*.content, section.content });
 old.*.content = new;
 
-log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
+log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
 i = section.index;
 
@@ -370,13 +382,13 @@ while (i < lines.len) { return error.UnexpectedStart; }, .ref => { - (Add reference) + (Add reference) }, .end => { break; }, else => { - (Add literal range) + (Add literal range) }, } } @@ -388,7 +400,7 @@ while (i < lines.len) {
const ref_name = std.mem.trimLeft(u8, line, " \t")[(k_ref.len)..];
 try content.append(.{ .reference = ref_name });
-log(.debug, "({d}) \tappended reference '{s}'", .{ i + 1, ref_name });
+log(.debug, "({d}) \tappended reference '{s}'", .{ i + 1, ref_name });
 i += 1;
 
@@ -429,17 +441,17 @@ fn codegen_main(lines: [][]const u8, list: []Section, index: usize, depth: u8, a defer buffer.deinit(); const section = list[index]; - log(.debug, "generating section '{s}'", .{section.name}); + log(.debug, "generating section '{s}'", .{section.name}); for (section.content) |content| switch (content) { .literal => |range| { - (Append literal range) + (Append literal range) }, .reference => |name| { - (Resolve reference) + (Resolve reference) }, }; - log(.debug, "ending section '{s}'", .{section.name}); + log(.debug, "ending section '{s}'", .{section.name}); return buffer.toOwnedSlice(); } @@ -475,7 +487,7 @@ try buffer.appendSlice(code); var buffer = std.ArrayList([]const u8).init(alloc); defer buffer.deinit(); - (Process configuration declarations) + (Process configuration declarations) var current_name: []const u8 = undefined; for (lines) |line| { @@ -486,16 +498,16 @@ try buffer.appendSlice(code); continue; } else switch (command_type(line)) { .start => { - (Format starting command) + (Format starting command) }, .add => { - (Format appending command) + (Format appending command) }, .ref => { - (Format reference command) + (Format reference command) }, .end => { - (Format ending command) + (Format ending command) }, else => { try buffer.append(line); @@ -548,13 +560,5 @@ try buffer.append(try std.mem.concat(alloc, u8, &[_][]const u8{ line[0..star
try buffer.append(try std.mem.join(alloc, current_name, conf_end));
 
-
- -
- - + \ No newline at end of file diff --git a/doc/freestanding/data.html b/doc/freestanding/data.html deleted file mode 100644 index 05bc7cf..0000000 --- a/doc/freestanding/data.html +++ /dev/null @@ -1,564 +0,0 @@ - - - - - - - - - -

data.lp

-

This file contains the various data processing-related constants and functions referenced by the tangling and weaving processes.

- -

*:

- -
(License)
-
-(Imports)
-
-(Processing limits)
-
-(Formatting keywords)
-
-(Configuration keywords)
-
-(Data structure types)
-
-(Error set)
-
-(Line splitting function)
-
-(Configuration searching function)
-
-(Section searching function)
-
-(Command type detection function)
-
-(Parsing functions)
-
-(Code generation functions)
-
-(Text generation function)
-
- -

License:

- -
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
- -

Constants

- -

We first import the standard library and the logging function from log.zig.

- -

Imports:

- -
const std = @import("std");
-const log = @import("log.zig").log;
-
-const Allocator = std.mem.Allocator;
-
- -

We define the maximum input file size of 4GiB, and the code generation function’s maximum recursion depth of 250 nested calls.

- -

Processing limits:

- -
pub const input_max = 0x1_0000_0000;
-pub const dereference_max = 250;
-
- -

We then define the recognized formatting keywords. These consist of the following:

- - - -

Formatting keywords:

- -
pub const k_start   = "@: ";
-pub const k_add     = "@+ ";
-pub const k_end     = "@.";
-pub const k_ref     = "@= ";
-pub const k_root    = "*";
-
- -

We similarly define the recognized configuration keywords, consisting of:

- - - -

Configuration keywords:

- -
pub const kc_start  = "@start ";
-pub const kc_add    = "@add ";
-pub const kc_end    = "@end ";
-pub const kc_ref    = "@ref ";
-pub const kc_esc    = "@@";
-pub const kc_nl     = "\\n";
-
- -

We then define the data structure used for parsing the input into code sections, described as follows:

- - - -

Data structure types:

- -
pub const Section = struct {
-    name: []const u8,
-    content: []const Content,
-};
-
-pub const CodeType = enum { literal, reference };
-pub const Content = union(CodeType) {
-    literal: LineRange,
-    reference: []const u8,
-};
-
-pub const LineRange = struct {
-    start: u32,
-    end: u32,
-};
-
- -

We also define the set of errors which may be encountered by the various processing functions, consisting of:

- - - -

Error set:

- -
pub const Errors = error {
-    UnexpectedStart,
-    UnexpectedEnd,
-    DereferenceLimit,
-    NotFound,
-};
-
- -

Preprocessing & Searching

- -

The line splitting function is defined, which operates on a buffer as follows.

- -

Line splitting function:

- -
pub fn split_lines(file: []const u8, alloc: Allocator) ![][]const u8 {
-    var buffer = std.ArrayList([]const u8).init(alloc);
-    defer buffer.deinit();
-
-    (Split file at each newline)
-
-    return buffer.toOwnedSlice();
-}
-
- -

The function simply iteratively splits the file at each newline, and appends each resulting line to the buffer.

- -

Split file at each newline:

- -
var iterator = std.mem.split(u8, file, "\n");
-while (iterator.next()) |line| {
-    try buffer.append(line);
-}
-
- -

In addition, the final empty line created by the trailing newline at the end of the file (inserted automatically by some text editors) is removed, if it exists. This may only be performed if the file is non-empty, to avoid out-of-bounds indexing.

- -

+ Split file at each newline:

- -
if ((buffer.items.len > 0) and std.mem.eql(u8, buffer.items[buffer.items.len - 1], "")) {
-    _ = buffer.pop();
-}
-
- -

We define the configuration command searching function, which returns a list containing the segments of the split format string. The function will return from within the for loop if the declaration is found, otherwise an error is reported.

- -

Configuration searching function:

- -
pub fn get_conf(lines: [][]const u8, key: []const u8, alloc: Allocator) ![][]const u8 {
-    for (lines) |line| {
-        if (std.mem.startsWith(u8, line, key)) {
-            return try fmt_conf(line, key, alloc);
-        }
-    }
-    log(.err, "config declaration '{s}' not found", .{std.mem.trimRight(u8, key, " \t")});
-    return error.NotFound;
-}
-
-(Auxiliary formatting function)
-
- -

If the declaration is found, its contained format string is split along instances of the section name escape sequence, and each substring has its instances of the newline escape sequence replaced with a literal newline.

- -

Auxiliary formatting function:

- -
fn fmt_conf(line: []const u8, key: []const u8, alloc: Allocator) ![][]const u8 {
-    var buffer = std.ArrayList([]const u8).init(alloc);
-    defer buffer.deinit();
-
-    var iterator = std.mem.split(u8, line[(key.len)..], kc_esc);
-    while (iterator.next()) |str| {
-        try buffer.append(try std.mem.replaceOwned(u8, alloc, str, kc_nl, "\n"));
-    }
-
-    return buffer.toOwnedSlice();
-}
-
- -

We define the code section searching function, which returns the index (into the section list) of the first section with a matching name, or returns an error if none exist.

- -

Section searching function:

- -
fn search(list: []Section, name: []const u8) !usize {
-    for (list) |section, index| {
-        if (std.mem.eql(u8, section.name, name)) return index;
-    }
-    log(.err, "section '{s}' not found", .{name});
-    return error.NotFound;
-}
-
- -

Parsing

- -

We first define a function which, for a given line, determines whether it consists of a formatting command, and which type of command it contains. This is done in order to enable the use of switch statements in later functions using this routine.

- -

Command type detection function:

- -
const CommandType = enum { start, add, end, ref, none };
-
-fn command_type(line: []const u8) CommandType {
-    if (std.mem.startsWith(u8, line, k_start)) {
-        return .start;
-    } else if (std.mem.startsWith(u8, line, k_add)) {
-        return .add;
-    } else if (std.mem.eql(u8, line, k_end)) {
-        return .end;
-    } else if (std.mem.startsWith(u8, std.mem.trimLeft(u8, line, " \t"), k_ref)) {
-        return .ref;
-    } else {
-        return .none;
-    }
-}
-
- -

We then define the parsing functions, consisting of the main parse function which builds the list of Sections, and its auxiliary parse_code subroutine which builds the contents of each CodeSection.

- -

Parsing functions:

- -
pub fn parse(lines: [][]const u8, alloc: Allocator) ![]Section {
-    var sections = std.ArrayList(Section).init(alloc);
-    defer sections.deinit();
-
-    (Main parsing routine)
-
-    return sections.toOwnedSlice();
-}
-
-fn parse_code(lines: [][]const u8, index: u32, alloc: Allocator) !CodeReturn {
-    var content = std.ArrayList(Content).init(alloc);
-    defer content.deinit();
-
-    (Code parsing subroutine)
-
-    return CodeReturn{ .content = content.toOwnedSlice(), .index = i + 1 };
-}
-
- -

The latter function takes as arguments the list of lines and the allocator similarly to the main function, but it is also passed the index of the current line being processed, and returns the line at which the main function should resume parsing after the code section is parsed. It thus returns a struct consisting of the contents of the code section and the next line number index, as follows.

- -

+ Parsing functions:

- -
const CodeReturn = struct {
-    content: []const Content,
-    index: u32,
-};
-
- -

The main parsing routine iterates over the list of lines, adding code sections where they occur, and otherwise ignoring text sections. If a section end command is encountered in the absence of a preceding starting command, an error is returned.

- -

Main parsing routine:

- -
var i: u32 = 0;
-while (i < lines.len) {
-    const line = lines[i];
-    switch (command_type(line)) {
-        .start  => {
-            (Add new section)
-        },
-        .add    => {
-            (Append to section)
-        },
-        .end    => {
-            log(.err, "line {d}: unexpected section end", .{i + 1});
-            return error.UnexpectedEnd;
-        },
-        else    => {
-            i += 1;
-        },
-    }
-}
-
- -

To add a new section, the name (consisting of everything after the starting token) is first retrieved from the starting command. Then the code parsing subroutine is called, beginning at the line after the starting command, and it returns the resulting code section (section.content) and the next line at which to resume parsing (section.index). The code section is appended to the section list, and the parsing routine continues at the provided index.

- -

Add new section:

- -
const name = line[(k_start.len)..];
-log(.debug, "({d}) starting section '{s}'", .{ i + 1, name });
-
-const section = try parse_code(lines, i + 1, alloc);
-try sections.append(.{ .name = name, .content = section.content });
-
-log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
-i = section.index;
-
- -

To append to an existing section, the section name and the code section contents to be appended are retrieved as above. The index of the section is located, along with its address within the section list. Next, the new contents of the section are created by concatenating the old contents with the newly parsed code section contents. The section list is then updated to point to the new section contents, and the parsing routine continues.

- -

Append to section:

- -
const name = line[(k_add.len)..];
-log(.debug, "({d}) appending to section '{s}'", .{ i + 1, name });
-
-const section = try parse_code(lines, i + 1, alloc);
-const index = try search(sections.items, name);
-const old = &sections.items[index];
-const new = try std.mem.concat(alloc, Content, &[_][]const Content{ old.*.content, section.content });
-old.*.content = new;
-
-log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
-i = section.index;
-
- -

The code parsing subroutine iterates over the list of lines similarly to the main routine. If a starting or appending command is encountered (lacking a matching ending command), an error is raised. Reference commands may be preceded with any amount of whitespace. The loop exits upon encountering an ending command. Otherwise, the line is appended as a literal element.

- -

Code parsing subroutine:

- -
var i = index;
-while (i < lines.len) {
-    const line = lines[i];
-    switch (command_type(line)) {
-        .start, .add    => {
-            log(.err, "line {d}: unexpected section start", .{i + 1});
-            return error.UnexpectedStart;
-        },
-        .ref    => {
-            (Add reference)
-        },
-        .end    => {
-            break;
-        },
-        else    => {
-            (Add literal range)
-        },
-    }
-}
-
- -

To add a reference, the name of the referenced section is retrieved, consisting of the characters following the leading whitespace and the command token. The resulting string is appended to the section contents list, and the parser continues at the next line.

- -

Add reference:

- -
const ref_name = std.mem.trimLeft(u8, line, " \t")[(k_ref.len)..];
-try content.append(.{ .reference = ref_name });
-log(.debug, "({d}) \tappended reference '{s}'", .{ i + 1, ref_name });
-i += 1;
-
- -

To add a literal range, the parser either updates the end index of the previous literal element, or creates a new literal element if the last element added is a reference. This action of switching on the previous section element must only occur if the section contents list is non-empty, in order to prevent out-of-bounds indexing. Otherwise, the parser unconditionally appends a new literal element to the list. After either case, parsing continues at the next line.

- -

Add literal range:

- -
if (content.items.len > 0) {
-    switch (content.items[content.items.len - 1]) {
-        .literal => |*range| {
-            range.*.end = i;
-        },
-        .reference => {
-            try content.append(.{ .literal = .{ .start = i, .end = i } });
-            log(.debug, "({d}) \tappending literal", .{i + 1});
-        },
-    }
-} else {
-    try content.append(.{ .literal = .{ .start = i, .end = i } });
-    log(.debug, "({d}) \tappending literal", .{i + 1});
-}
-i += 1;
-
- -

Code Generation

- -

We define the source code generation procedure which is split into two functions, consisting of a wrapper function which begins code generation at (the index of) the top-level section, and the main procedure which iterates over the current section contents, recursively resolving section references and appending literal elements to the list of source code lines.

- -

Code generation functions:

- -
pub fn codegen(lines: [][]const u8, list: []Section, alloc: Allocator) ![][]const u8 {
-    const root = try search(list, k_root);
-    return try codegen_main(lines, list, root, 0, alloc);
-}
-
-fn codegen_main(lines: [][]const u8, list: []Section, index: usize, depth: u8, alloc: Allocator) anyerror![][]const u8 {
-    var buffer = std.ArrayList([]const u8).init(alloc);
-    defer buffer.deinit();
-
-    const section = list[index];
-    log(.debug, "generating section '{s}'", .{section.name});
-    for (section.content) |content| switch (content) {
-        .literal => |range| {
-            (Append literal range)
-        },
-        .reference => |name| {
-            (Resolve reference)
-        },
-    };
-
-    log(.debug, "ending section '{s}'", .{section.name});
-    return buffer.toOwnedSlice();
-}
-
- -

To append a literal range, the range of lines is simply appended to the buffer.

- -

Append literal range:

- -
log(.debug, "adding literal range {d}-{d}", .{ range.start + 1, range.end + 1 });
-try buffer.appendSlice(lines[(range.start)..(range.end + 1)]);
-
- -

To resolve a section reference, the function must first check whether the current recursion depth has exceeded the configured limit, and return an error if this occurs. Otherwise, the index of the referenced section is retrieved, its contents are recursively parsed (with an incremented recursion depth), and the resulting source code lines are appended to the buffer.

- -

Resolve reference:

- -
if (depth > dereference_max) {
-    log(.err, "section dereferencing recursion depth exceeded (max {d})", .{dereference_max});
-    return error.DereferenceLimit;
-}
-const ref = try search(list, name);
-const code = try codegen_main(lines, list, ref, depth + 1, alloc);
-try buffer.appendSlice(code);
-
- -

Text Generation

- -

Finally, we define the text generation function which iterates over the list of lines and produces the literate program text to be passed to an external document processor. In order to keep track of the name of the code section currently being formatted at any given point, the variable current_name is continually updated to contain the current name string. Configuration declarations are skipped, and lines which do not contain any formatting commands are appended as they are.

- -

Text generation function:

- -
pub fn textgen(lines: [][]const u8, alloc: Allocator) ![][]const u8 {
-    var buffer = std.ArrayList([]const u8).init(alloc);
-    defer buffer.deinit();
-
-    (Process configuration declarations)
-
-    var current_name: []const u8 = undefined;
-    for (lines) |line| {
-        if (    std.mem.startsWith(u8, line, kc_start)
-        or  std.mem.startsWith(u8, line, kc_add)
-        or  std.mem.startsWith(u8, line, kc_end)
-        or  std.mem.startsWith(u8, line, kc_ref)) {
-            continue;
-        } else switch (command_type(line)) {
-            .start  => {
-                (Format starting command)
-            },
-            .add    => {
-                (Format appending command)
-            },
-            .ref    => {
-                (Format reference command)
-            },
-            .end    => {
-                (Format ending command)
-            },
-            else    => {
-                try buffer.append(line);
-            },
-        }
-    }
-    
-    return buffer.toOwnedSlice();
-}
-
- -

The formatting strings given by each configuration declaration are first retrieved. If the declaration of the format string for the section appending command is omitted, the format string for the section starting command is used in its place.

- -

Process configuration declarations:

- -
const conf_start = try get_conf(lines, kc_start, alloc);
-const conf_add = get_conf(lines, kc_add, alloc) catch conf_start;
-const conf_end = try get_conf(lines, kc_end, alloc);
-const conf_ref = try get_conf(lines, kc_ref, alloc);
-
- -

To process a section starting command, the current section name is updated, and the contents of the corresponding formatting command (that is, the segments of the split formatting string) are interspersed with copies of the current section name. The resulting string is then appended to the buffer.

- -

Format starting command:

- -
current_name = line[(k_start.len)..];
-try buffer.append(try std.mem.join(alloc, current_name, conf_start));
-
- -

Processing a section appending command is performed similarly.

- -

Format appending command:

- -
current_name = line[(k_add.len)..];
-try buffer.append(try std.mem.join(alloc, current_name, conf_add));
-
- -

To process a reference command, the index of the reference command keyword is first extracted. Then the formatted reference string is created, to which the reference command line’s leading whitespace is prepended (to preserve indentation).

- -

Format reference command:

- -
const start = std.mem.indexOf(u8, line, k_ref).?;
-const ref = try std.mem.join(alloc, line[(start + k_ref.len)..], conf_ref);
-try buffer.append(try std.mem.concat(alloc, u8, &[_][]const u8{ line[0..start], ref }));
-
- -

Processing a section ending command is performed similarly to the starting and appending commands, however it does not require updating the current section name.

- -

Format ending command:

- -
try buffer.append(try std.mem.join(alloc, current_name, conf_end));
-
- - \ No newline at end of file diff --git a/doc/freestanding/log.html b/doc/freestanding/log.html deleted file mode 100644 index 3e32826..0000000 --- a/doc/freestanding/log.html +++ /dev/null @@ -1,109 +0,0 @@ - - - - - - - - - -

log.lp

-

This file contains a simple logging function. It is a modified version of the example logging function implementation provided in std.log.

- -

*:

- -
(License)
-
-(Imports)
-
-(Level setting)
-
-(Logging function)
-
- -

License:

- -
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
- -

We first import the standard library, and the Level type which is an enum representing the possible log levels.

- -

Imports:

- -
const std = @import("std");
-const Level = std.log.Level;
-
- -

The logging function is structured such that only log messages equal to or above a certain severity threshold will be printed to the console. This threshold can then be globally modified during development. The threshold constant is defined below.

- -

Level setting:

- -
pub const log_level: Level = .warn;
-
- -

We then define the logging function itself, which accepts a Level value and the format string and argument struct to be passed to the inner print function.

- -

Logging function:

- -
pub fn log(
-    comptime level: Level,
-    comptime format: []const u8,
-    args: anytype,
-) void {
-    (Compare with level threshold)
-
-    (Define message string)
-
-    (Print to console)
-}
-
- -

First the comparison against the severity threshold is made. (A lower integer value signifies a higher severity.) If the severity is lower than the threshold, the function immediately exits.

- -

Compare with level threshold:

- -
if (@enumToInt(level) > @enumToInt(log_level)) return;
-
- -

Next the message string is created. The unformatted content of this string is evaluated at compile time, before being formatted by the print function at runtime. The ‘info’ and ‘error’ log levels use custom names, whereas all other levels use their default display names.

- -

Define message string:

- -
const msg = "[" ++ switch (level) {
-        .info => "ok",
-        .err => "err",
-        else => level.asText(),
-} ++ "]\t" ++ format ++ "\n";
-
- -

Finally, the message is printed to the console. If an error is returned by the print() call, the logging function silently exits.

- - - -
const stderr = std.io.getStdErr().writer();
-nosuspend stderr.print(msg, args) catch return;
-
- - \ No newline at end of file diff --git a/doc/freestanding/tangle.html b/doc/freestanding/tangle.html deleted file mode 100644 index 85ede91..0000000 --- a/doc/freestanding/tangle.html +++ /dev/null @@ -1,148 +0,0 @@ - - - - - - - - - -

tangle.lp

-

The structure of this file is quite similar to that of weave.zig, only differing in terms of which functions are used to transform the input data.

- -

*:

- -
(License)
-
-(Imports)
-
-pub fn main() !u8 {
-    (IO initialization)
-
-    (Allocator initialization)
-
-    (Read file from stdin)
-
-    (Split into lines)
-
-    (Parse lines into sections)
-
-    (Generate code)
-
-    (Write to stdout)
-
-    return 0;
-}
-
- -

License:

- -
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
- -

First we import the other files containing the core functions.

- -

Imports:

- -
const std = @import("std");
-const data = @import("data.zig");
-const log = @import("log.zig").log;
-
-const Allocator = std.mem.Allocator;
-
- -

Within the main procedure, we first initialize the stdin and stdout interfaces.

- -

IO initialization:

- -
const stdin = std.io.getStdIn();
-const stdout = std.io.getStdOut();
-
- -

We then initialize the allocator, deferring its deinitialization to the end of the process. Since the overall memory usage pattern is one in which all resources may be freed at once, the arena allocator is the most appropriate choice for this program.

- -

Allocator initialization:

- -
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
-var alloc = arena.allocator();
-defer arena.deinit();
-
- -

The input file is then read from stdin. In the case of input exceeding the maximum permitted file size, the program may report the error and exit normally. All other errors which may be returned are memory allocation failures and should thus yield control to the panic handler.

- -

Read file from stdin:

- -
const input = stdin.reader().readAllAlloc(alloc, data.input_max) catch |err| switch (err) {
-    error.StreamTooLong => {
-        log(.err, "input too large (maximum {})", .{std.fmt.fmtIntSizeBin(data.input_max)});
-        return 1;
-    },
-    else => |e| return e,
-};
-
- -

We then pass the input into the line splitting function, creating an array of strings.

- -

Split into lines:

- -
const lines = try data.split_lines(input, alloc);
-
- -

The lines are then passed into the parsing function, which may return a parsing error. Logging such errors is handled by the function itself, and thus the errors are handled here solely by exiting.

- -

Parse lines into sections:

- -
const sections = data.parse(lines, alloc) catch |err| switch (err) {
-    error.UnexpectedStart,
-    error.UnexpectedEnd => {
-        return 1;
-    },
-    else => |e| return e,
-};
-
- -

The code file is then generated. This entails resolving references to section names, which may return an error, handled by exiting as above.

- -

Generate code:

- -
const code = data.codegen(lines, sections, alloc) catch |err| switch (err) {
-    error.DereferenceLimit,
-    error.NotFound => {
-        return 1;
-    },
-    else => |e| return e,
-};
-
- -

Finally, the lines of the code file are written to stdout, separated by newlines.

- -

Write to stdout:

- -
for (code) |line| {
-    try stdout.writer().print("{s}\n", .{line});
-}
-
- - \ No newline at end of file diff --git a/doc/freestanding/weave.html b/doc/freestanding/weave.html deleted file mode 100644 index 9e851ba..0000000 --- a/doc/freestanding/weave.html +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - -

weave.lp

-

The structure of this file is quite similar to that of tangle.zig, only differing in terms of which functions are used to transform the input data.

- -

*:

- -
(License)
-
-(Imports)
-
-pub fn main() !u8 {
-    (IO initialization)
-
-    (Allocator initialization)
-
-    (Read file from stdin)
-
-    (Split into lines)
-
-    (Generate text)
-
-    (Write to stdout)
-
-    return 0;
-}
-
- -

License:

- -
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
- -

First we import the other files containing the core functions.

- -

Imports:

- -
const std = @import("std");
-const data = @import("data.zig");
-const log = @import("log.zig").log;
-
-const Allocator = std.mem.Allocator;
-
- -

Within the main procedure, we first initialize the stdin and stdout interfaces.

- -

IO initialization:

- -
const stdin = std.io.getStdIn();
-const stdout = std.io.getStdOut();
-
- -

We then initialize the allocator, deferring its deinitialization to the end of the process. Since the overall memory usage pattern is one in which all resources may be freed at once, the arena allocator is the most appropriate choice for this program.

- -

Allocator initialization:

- -
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
-var alloc = arena.allocator();
-defer arena.deinit();
-
- -

The input file is then read from stdin. In the case of input exceeding the maximum permitted file size, the program may report the error and exit normally. All other errors which may be returned are memory allocation failures and should thus yield control to the panic handler.

- -

Read file from stdin:

- -
const input = stdin.reader().readAllAlloc(alloc, data.input_max) catch |err| switch (err) {
-    error.StreamTooLong => {
-        log(.err, "input too large (maximum {})", .{std.fmt.fmtIntSizeBin(data.input_max)});
-        return 1;
-    },
-    else => |e| return e,
-};
-
- -

We then pass the input into the line splitting function, creating an array of strings.

- -

Split into lines:

- -
const lines = try data.split_lines(input, alloc);
-
- -

The text file is then generated. This entails searching for the configuration declarations, which may fail and thus return an error. Logging such errors is handled by the function itself, and thus the errors are handled here solely by exiting.

- -

Generate text:

- -
const text = data.textgen(lines, alloc) catch |err| switch (err) {
-    error.NotFound => {
-        return 1;
-    },
-    else => |e| return e,
-};
-
- -

Finally, the lines of the text file are written to stdout, separated by newlines.

- -

Write to stdout:

- -
for (text) |line| {
-    try stdout.writer().print("{s}\n", .{line});
-}
-
- - \ No newline at end of file diff --git a/doc/header b/doc/header deleted file mode 100644 index fcecc47..0000000 --- a/doc/header +++ /dev/null @@ -1,8 +0,0 @@ -title: -leading: -opts: h - -@start ### @@:\n``` -@add ### + @@:\n``` -@end ``` -@ref @= @@ =@ diff --git a/doc/log.html b/doc/log.html old mode 100755 new mode 100644 index 59e9627..3e32826 --- a/doc/log.html +++ b/doc/log.html @@ -3,37 +3,49 @@ - - - - - - -log.lp — DistressNetwork° - + + -
-
-
log.lp
-
+

log.lp

This file contains a simple logging function. It is a modified version of the example logging function implementation provided in std.log.

*:

-
(License)
+
(License)
 
-(Imports)
+(Imports)
 
-(Level setting)
+(Level setting)
 
-(Logging function)
+(Logging function)
 

License:

-
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 

We first import the standard library, and the Level type which is an enum representing the possible log levels.

@@ -60,11 +72,11 @@ const Level = std.log.Level; comptime format: []const u8, args: anytype, ) void { - (Compare with level threshold) + (Compare with level threshold) - (Define message string) + (Define message string) - (Print to console) + (Print to console) }
@@ -93,13 +105,5 @@ const Level = std.log.Level;
const stderr = std.io.getStdErr().writer();
 nosuspend stderr.print(msg, args) catch return;
 
-
- -
- - + \ No newline at end of file diff --git a/doc/online/data.html b/doc/online/data.html new file mode 100755 index 0000000..9fddef4 --- /dev/null +++ b/doc/online/data.html @@ -0,0 +1,560 @@ + + + + + + + + + + + +data.lp — DistressNetwork° + + + +
+
+
data.lp
+
+

This file contains the various data processing-related constants and functions referenced by the tangling and weaving processes.

+ +

*:

+ +
(License)
+
+(Imports)
+
+(Processing limits)
+
+(Formatting keywords)
+
+(Configuration keywords)
+
+(Data structure types)
+
+(Error set)
+
+(Line splitting function)
+
+(Configuration searching function)
+
+(Section searching function)
+
+(Command type detection function)
+
+(Parsing functions)
+
+(Code generation functions)
+
+(Text generation function)
+
+ +

License:

+ +
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+ +

Constants

+ +

We first import the standard library and the logging function from log.zig.

+ +

Imports:

+ +
const std = @import("std");
+const log = @import("log.zig").log;
+
+const Allocator = std.mem.Allocator;
+
+ +

We define the maximum input file size of 4GiB, and the code generation function’s maximum recursion depth of 250 nested calls.

+ +

Processing limits:

+ +
pub const input_max = 0x1_0000_0000;
+pub const dereference_max = 250;
+
+ +

We then define the recognized formatting keywords. These consist of the following:

+ +
    +
  • @:, which begins a new code section;
  • +
  • @+, which appends content to a previous code section;
  • +
  • @., which terminates the definition of a code section;
  • +
  • @=, which creates a reference to another code section;
  • +
  • *, which is a reserved section name representing the root level of the source code.
  • +
+ +

Formatting keywords:

+ +
pub const k_start   = "@: ";
+pub const k_add     = "@+ ";
+pub const k_end     = "@.";
+pub const k_ref     = "@= ";
+pub const k_root    = "*";
+
+ +

We similarly define the recognized configuration keywords, consisting of:

+ +
    +
  • @start, which defines the leading formatted code delimiter when beginning new sections;
  • +
  • @add, which defines the leading formatted code delimiter when appending to existing sections;
  • +
  • @end, which defines the trailing formatted code delimiter;
  • +
  • @ref, which defines the format for section references;
  • +
  • @@, which is the escape sequence representing the current section name;
  • +
  • \n, which is the escape sequence representing a newline.
  • +
+ +

Configuration keywords:

+ +
pub const kc_start  = "@start ";
+pub const kc_add    = "@add ";
+pub const kc_end    = "@end ";
+pub const kc_ref    = "@ref ";
+pub const kc_esc    = "@@";
+pub const kc_nl     = "\\n";
+
+ +

We then define the data structure used for parsing the input into code sections, described as follows:

+ +
    +
  • The overall structure of the file is an array of Sections.
  • +
  • A Section consists of the section name and an array of Content elements.
  • +
  • A Content element may be either a range of literal lines of code or a reference to another section.
  • +
  • A LineRange is a pair of integers indicating the starting and ending line numbers of the section.
  • +
+ +

Data structure types:

+ +
pub const Section = struct {
+    name: []const u8,
+    content: []const Content,
+};
+
+pub const CodeType = enum { literal, reference };
+pub const Content = union(CodeType) {
+    literal: LineRange,
+    reference: []const u8,
+};
+
+pub const LineRange = struct {
+    start: u32,
+    end: u32,
+};
+
+ +

We also define the set of errors which may be encountered by the various processing functions, consisting of:

+ +
    +
  • Unexpected section start commands,
  • +
  • Unexpected section end commands,
  • +
  • Recursive dereferencing exceeding the specified depth limit,
  • +
  • References to nonexistent section names or configuration commands.
  • +
+ +

Error set:

+ +
pub const Errors = error {
+    UnexpectedStart,
+    UnexpectedEnd,
+    DereferenceLimit,
+    NotFound,
+};
+
+ +

Preprocessing & Searching

+ +

The line splitting function is defined, which operates on a buffer as follows.

+ +

Line splitting function:

+ +
pub fn split_lines(file: []const u8, alloc: Allocator) ![][]const u8 {
+    var buffer = std.ArrayList([]const u8).init(alloc);
+    defer buffer.deinit();
+
+    (Split file at each newline)
+
+    return buffer.toOwnedSlice();
+}
+
+ +

The function simply iteratively splits the file at each newline, and appends each resulting line to the buffer.

+ +

Split file at each newline:

+ +
var iterator = std.mem.split(u8, file, "\n");
+while (iterator.next()) |line| {
+    try buffer.append(line);
+}
+
+ +

In addition, the final empty line created by the trailing newline at the end of the file (inserted automatically by some text editors) is removed, if it exists. This may only be performed if the file is non-empty, to avoid out-of-bounds indexing.

+ +

+ Split file at each newline:

+ +
if ((buffer.items.len > 0) and std.mem.eql(u8, buffer.items[buffer.items.len - 1], "")) {
+    _ = buffer.pop();
+}
+
+ +

We define the configuration command searching function, which returns a list containing the segments of the split format string. The function will return from within the for loop if the declaration is found, otherwise an error is reported.

+ +

Configuration searching function:

+ +
pub fn get_conf(lines: [][]const u8, key: []const u8, alloc: Allocator) ![][]const u8 {
+    for (lines) |line| {
+        if (std.mem.startsWith(u8, line, key)) {
+            return try fmt_conf(line, key, alloc);
+        }
+    }
+    log(.err, "config declaration '{s}' not found", .{std.mem.trimRight(u8, key, " \t")});
+    return error.NotFound;
+}
+
+(Auxiliary formatting function)
+
+ +

If the declaration is found, its contained format string is split along instances of the section name escape sequence, and each substring has its instances of the newline escape sequence replaced with a literal newline.

+ +

Auxiliary formatting function:

+ +
fn fmt_conf(line: []const u8, key: []const u8, alloc: Allocator) ![][]const u8 {
+    var buffer = std.ArrayList([]const u8).init(alloc);
+    defer buffer.deinit();
+
+    var iterator = std.mem.split(u8, line[(key.len)..], kc_esc);
+    while (iterator.next()) |str| {
+        try buffer.append(try std.mem.replaceOwned(u8, alloc, str, kc_nl, "\n"));
+    }
+
+    return buffer.toOwnedSlice();
+}
+
+ +

We define the code section searching function, which returns the index (into the section list) of the first section with a matching name, or returns an error if none exist.

+ +

Section searching function:

+ +
fn search(list: []Section, name: []const u8) !usize {
+    for (list) |section, index| {
+        if (std.mem.eql(u8, section.name, name)) return index;
+    }
+    log(.err, "section '{s}' not found", .{name});
+    return error.NotFound;
+}
+
+ +

Parsing

+ +

We first define a function which, for a given line, determines whether it consists of a formatting command, and which type of command it contains. This is done in order to enable the use of switch statements in later functions using this routine.

+ +

Command type detection function:

+ +
const CommandType = enum { start, add, end, ref, none };
+
+fn command_type(line: []const u8) CommandType {
+    if (std.mem.startsWith(u8, line, k_start)) {
+        return .start;
+    } else if (std.mem.startsWith(u8, line, k_add)) {
+        return .add;
+    } else if (std.mem.eql(u8, line, k_end)) {
+        return .end;
+    } else if (std.mem.startsWith(u8, std.mem.trimLeft(u8, line, " \t"), k_ref)) {
+        return .ref;
+    } else {
+        return .none;
+    }
+}
+
+ +

We then define the parsing functions, consisting of the main parse function which builds the list of Sections, and its auxiliary parse_code subroutine which builds the contents of each CodeSection.

+ +

Parsing functions:

+ +
pub fn parse(lines: [][]const u8, alloc: Allocator) ![]Section {
+    var sections = std.ArrayList(Section).init(alloc);
+    defer sections.deinit();
+
+    (Main parsing routine)
+
+    return sections.toOwnedSlice();
+}
+
+fn parse_code(lines: [][]const u8, index: u32, alloc: Allocator) !CodeReturn {
+    var content = std.ArrayList(Content).init(alloc);
+    defer content.deinit();
+
+    (Code parsing subroutine)
+
+    return CodeReturn{ .content = content.toOwnedSlice(), .index = i + 1 };
+}
+
+ +

The latter function takes as arguments the list of lines and the allocator similarly to the main function, but it is also passed the index of the current line being processed, and returns the line at which the main function should resume parsing after the code section is parsed. It thus returns a struct consisting of the contents of the code section and the next line number index, as follows.

+ +

+ Parsing functions:

+ +
const CodeReturn = struct {
+    content: []const Content,
+    index: u32,
+};
+
+ +

The main parsing routine iterates over the list of lines, adding code sections where they occur, and otherwise ignoring text sections. If a section end command is encountered in the absence of a preceding starting command, an error is returned.

+ +

Main parsing routine:

+ +
var i: u32 = 0;
+while (i < lines.len) {
+    const line = lines[i];
+    switch (command_type(line)) {
+        .start  => {
+            (Add new section)
+        },
+        .add    => {
+            (Append to section)
+        },
+        .end    => {
+            log(.err, "line {d}: unexpected section end", .{i + 1});
+            return error.UnexpectedEnd;
+        },
+        else    => {
+            i += 1;
+        },
+    }
+}
+
+ +

To add a new section, the name (consisting of everything after the starting token) is first retrieved from the starting command. Then the code parsing subroutine is called, beginning at the line after the starting command, and it returns the resulting code section (section.content) and the next line at which to resume parsing (section.index). The code section is appended to the section list, and the parsing routine continues at the provided index.

+ +

Add new section:

+ +
const name = line[(k_start.len)..];
+log(.debug, "({d}) starting section '{s}'", .{ i + 1, name });
+
+const section = try parse_code(lines, i + 1, alloc);
+try sections.append(.{ .name = name, .content = section.content });
+
+log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
+i = section.index;
+
+ +

To append to an existing section, the section name and the code section contents to be appended are retrieved as above. The index of the section is located, along with its address within the section list. Next, the new contents of the section are created by concatenating the old contents with the newly parsed code section contents. The section list is then updated to point to the new section contents, and the parsing routine continues.

+ +

Append to section:

+ +
const name = line[(k_add.len)..];
+log(.debug, "({d}) appending to section '{s}'", .{ i + 1, name });
+
+const section = try parse_code(lines, i + 1, alloc);
+const index = try search(sections.items, name);
+const old = &sections.items[index];
+const new = try std.mem.concat(alloc, Content, &[_][]const Content{ old.*.content, section.content });
+old.*.content = new;
+
+log(.debug, "({d}) ending section '{s}'", .{ section.index, name });
+i = section.index;
+
+ +

The code parsing subroutine iterates over the list of lines similarly to the main routine. If a starting or appending command is encountered (lacking a matching ending command), an error is raised. Reference commands may be preceded with any amount of whitespace. The loop exits upon encountering an ending command. Otherwise, the line is appended as a literal element.

+ +

Code parsing subroutine:

+ +
var i = index;
+while (i < lines.len) {
+    const line = lines[i];
+    switch (command_type(line)) {
+        .start, .add    => {
+            log(.err, "line {d}: unexpected section start", .{i + 1});
+            return error.UnexpectedStart;
+        },
+        .ref    => {
+            (Add reference)
+        },
+        .end    => {
+            break;
+        },
+        else    => {
+            (Add literal range)
+        },
+    }
+}
+
+ +

To add a reference, the name of the referenced section is retrieved, consisting of the characters following the leading whitespace and the command token. The resulting string is appended to the section contents list, and the parser continues at the next line.

+ +

Add reference:

+ +
const ref_name = std.mem.trimLeft(u8, line, " \t")[(k_ref.len)..];
+try content.append(.{ .reference = ref_name });
+log(.debug, "({d}) \tappended reference '{s}'", .{ i + 1, ref_name });
+i += 1;
+
+ +

To add a literal range, the parser either updates the end index of the previous literal element, or creates a new literal element if the last element added is a reference. This action of switching on the previous section element must only occur if the section contents list is non-empty, in order to prevent out-of-bounds indexing. Otherwise, the parser unconditionally appends a new literal element to the list. After either case, parsing continues at the next line.

+ +

Add literal range:

+ +
if (content.items.len > 0) {
+    switch (content.items[content.items.len - 1]) {
+        .literal => |*range| {
+            range.*.end = i;
+        },
+        .reference => {
+            try content.append(.{ .literal = .{ .start = i, .end = i } });
+            log(.debug, "({d}) \tappending literal", .{i + 1});
+        },
+    }
+} else {
+    try content.append(.{ .literal = .{ .start = i, .end = i } });
+    log(.debug, "({d}) \tappending literal", .{i + 1});
+}
+i += 1;
+
+ +

Code Generation

+ +

We define the source code generation procedure which is split into two functions, consisting of a wrapper function which begins code generation at (the index of) the top-level section, and the main procedure which iterates over the current section contents, recursively resolving section references and appending literal elements to the list of source code lines.

+ +

Code generation functions:

+ +
pub fn codegen(lines: [][]const u8, list: []Section, alloc: Allocator) ![][]const u8 {
+    const root = try search(list, k_root);
+    return try codegen_main(lines, list, root, 0, alloc);
+}
+
+fn codegen_main(lines: [][]const u8, list: []Section, index: usize, depth: u8, alloc: Allocator) anyerror![][]const u8 {
+    var buffer = std.ArrayList([]const u8).init(alloc);
+    defer buffer.deinit();
+
+    const section = list[index];
+    log(.debug, "generating section '{s}'", .{section.name});
+    for (section.content) |content| switch (content) {
+        .literal => |range| {
+            (Append literal range)
+        },
+        .reference => |name| {
+            (Resolve reference)
+        },
+    };
+
+    log(.debug, "ending section '{s}'", .{section.name});
+    return buffer.toOwnedSlice();
+}
+
+ +

To append a literal range, the range of lines is simply appended to the buffer.

+ +

Append literal range:

+ +
log(.debug, "adding literal range {d}-{d}", .{ range.start + 1, range.end + 1 });
+try buffer.appendSlice(lines[(range.start)..(range.end + 1)]);
+
+ +

To resolve a section reference, the function must first check whether the current recursion depth has exceeded the configured limit, and return an error if this occurs. Otherwise, the index of the referenced section is retrieved, its contents are recursively parsed (with an incremented recursion depth), and the resulting source code lines are appended to the buffer.

+ +

Resolve reference:

+ +
if (depth > dereference_max) {
+    log(.err, "section dereferencing recursion depth exceeded (max {d})", .{dereference_max});
+    return error.DereferenceLimit;
+}
+const ref = try search(list, name);
+const code = try codegen_main(lines, list, ref, depth + 1, alloc);
+try buffer.appendSlice(code);
+
+ +

Text Generation

+ +

Finally, we define the text generation function which iterates over the list of lines and produces the literate program text to be passed to an external document processor. In order to keep track of the name of the code section currently being formatted at any given point, the variable current_name is continually updated to contain the current name string. Configuration declarations are skipped, and lines which do not contain any formatting commands are appended as they are.

+ +

Text generation function:

+ +
pub fn textgen(lines: [][]const u8, alloc: Allocator) ![][]const u8 {
+    var buffer = std.ArrayList([]const u8).init(alloc);
+    defer buffer.deinit();
+
+    (Process configuration declarations)
+
+    var current_name: []const u8 = undefined;
+    for (lines) |line| {
+        if (    std.mem.startsWith(u8, line, kc_start)
+        or  std.mem.startsWith(u8, line, kc_add)
+        or  std.mem.startsWith(u8, line, kc_end)
+        or  std.mem.startsWith(u8, line, kc_ref)) {
+            continue;
+        } else switch (command_type(line)) {
+            .start  => {
+                (Format starting command)
+            },
+            .add    => {
+                (Format appending command)
+            },
+            .ref    => {
+                (Format reference command)
+            },
+            .end    => {
+                (Format ending command)
+            },
+            else    => {
+                try buffer.append(line);
+            },
+        }
+    }
+    
+    return buffer.toOwnedSlice();
+}
+
+ +

The formatting strings given by each configuration declaration are first retrieved. If the declaration of the format string for the section appending command is omitted, the format string for the section starting command is used in its place.

+ +

Process configuration declarations:

+ +
const conf_start = try get_conf(lines, kc_start, alloc);
+const conf_add = get_conf(lines, kc_add, alloc) catch conf_start;
+const conf_end = try get_conf(lines, kc_end, alloc);
+const conf_ref = try get_conf(lines, kc_ref, alloc);
+
+ +

To process a section starting command, the current section name is updated, and the contents of the corresponding formatting command (that is, the segments of the split formatting string) are interspersed with copies of the current section name. The resulting string is then appended to the buffer.

+ +

Format starting command:

+ +
current_name = line[(k_start.len)..];
+try buffer.append(try std.mem.join(alloc, current_name, conf_start));
+
+ +

Processing a section appending command is performed similarly.

+ +

Format appending command:

+ +
current_name = line[(k_add.len)..];
+try buffer.append(try std.mem.join(alloc, current_name, conf_add));
+
+ +

To process a reference command, the index of the reference command keyword is first extracted. Then the formatted reference string is created, to which the reference command line’s leading whitespace is prepended (to preserve indentation).

+ +

Format reference command:

+ +
const start = std.mem.indexOf(u8, line, k_ref).?;
+const ref = try std.mem.join(alloc, line[(start + k_ref.len)..], conf_ref);
+try buffer.append(try std.mem.concat(alloc, u8, &[_][]const u8{ line[0..start], ref }));
+
+ +

Processing a section ending command is performed similarly to the starting and appending commands, however it does not require updating the current section name.

+ +

Format ending command:

+ +
try buffer.append(try std.mem.join(alloc, current_name, conf_end));
+
+
+ +
+ + + diff --git a/doc/online/header b/doc/online/header new file mode 100644 index 0000000..fcecc47 --- /dev/null +++ b/doc/online/header @@ -0,0 +1,8 @@ +title: +leading: +opts: h + +@start ### @@:\n``` +@add ### + @@:\n``` +@end ``` +@ref @= @@ =@ diff --git a/doc/online/log.html b/doc/online/log.html new file mode 100755 index 0000000..59e9627 --- /dev/null +++ b/doc/online/log.html @@ -0,0 +1,105 @@ + + + + + + + + + + + +log.lp — DistressNetwork° + + + +
+
+
log.lp
+
+

This file contains a simple logging function. It is a modified version of the example logging function implementation provided in std.log.

+ +

*:

+ +
(License)
+
+(Imports)
+
+(Level setting)
+
+(Logging function)
+
+ +

License:

+ +
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+ +

We first import the standard library, and the Level type which is an enum representing the possible log levels.

+ +

Imports:

+ +
const std = @import("std");
+const Level = std.log.Level;
+
+ +

The logging function is structured such that only log messages equal to or above a certain severity threshold will be printed to the console. This threshold can then be globally modified during development. The threshold constant is defined below.

+ +

Level setting:

+ +
pub const log_level: Level = .warn;
+
+ +

We then define the logging function itself, which accepts a Level value and the format string and argument struct to be passed to the inner print function.

+ +

Logging function:

+ +
pub fn log(
+    comptime level: Level,
+    comptime format: []const u8,
+    args: anytype,
+) void {
+    (Compare with level threshold)
+
+    (Define message string)
+
+    (Print to console)
+}
+
+ +

First the comparison against the severity threshold is made. (A lower integer value signifies a higher severity.) If the severity is lower than the threshold, the function immediately exits.

+ +

Compare with level threshold:

+ +
if (@enumToInt(level) > @enumToInt(log_level)) return;
+
+ +

Next the message string is created. The unformatted content of this string is evaluated at compile time, before being formatted by the print function at runtime. The ‘info’ and ‘error’ log levels use custom names, whereas all other levels use their default display names.

+ +

Define message string:

+ +
const msg = "[" ++ switch (level) {
+        .info => "ok",
+        .err => "err",
+        else => level.asText(),
+} ++ "]\t" ++ format ++ "\n";
+
+ +

Finally, the message is printed to the console. If an error is returned by the print() call, the logging function silently exits.

+ + + +
const stderr = std.io.getStdErr().writer();
+nosuspend stderr.print(msg, args) catch return;
+
+
+ +
+ + + diff --git a/doc/online/tangle.html b/doc/online/tangle.html new file mode 100755 index 0000000..02e5905 --- /dev/null +++ b/doc/online/tangle.html @@ -0,0 +1,144 @@ + + + + + + + + + + + +tangle.lp — DistressNetwork° + + + +
+
+
tangle.lp
+
+

The structure of this file is quite similar to that of weave.zig, only differing in terms of which functions are used to transform the input data.

+ +

*:

+ +
(License)
+
+(Imports)
+
+pub fn main() !u8 {
+    (IO initialization)
+
+    (Allocator initialization)
+
+    (Read file from stdin)
+
+    (Split into lines)
+
+    (Parse lines into sections)
+
+    (Generate code)
+
+    (Write to stdout)
+
+    return 0;
+}
+
+ +

License:

+ +
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+ +

First we import the other files containing the core functions.

+ +

Imports:

+ +
const std = @import("std");
+const data = @import("data.zig");
+const log = @import("log.zig").log;
+
+const Allocator = std.mem.Allocator;
+
+ +

Within the main procedure, we first initialize the stdin and stdout interfaces.

+ +

IO initialization:

+ +
const stdin = std.io.getStdIn();
+const stdout = std.io.getStdOut();
+
+ +

We then initialize the allocator, deferring its deinitialization to the end of the process. Since the overall memory usage pattern is one in which all resources may be freed at once, the arena allocator is the most appropriate choice for this program.

+ +

Allocator initialization:

+ +
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+var alloc = arena.allocator();
+defer arena.deinit();
+
+ +

The input file is then read from stdin. In the case of input exceeding the maximum permitted file size, the program may report the error and exit normally. All other errors which may be returned are memory allocation failures and should thus yield control to the panic handler.

+ +

Read file from stdin:

+ +
const input = stdin.reader().readAllAlloc(alloc, data.input_max) catch |err| switch (err) {
+    error.StreamTooLong => {
+        log(.err, "input too large (maximum {})", .{std.fmt.fmtIntSizeBin(data.input_max)});
+        return 1;
+    },
+    else => |e| return e,
+};
+
+ +

We then pass the input into the line splitting function, creating an array of strings.

+ +

Split into lines:

+ +
const lines = try data.split_lines(input, alloc);
+
+ +

The lines are then passed into the parsing function, which may return a parsing error. Logging such errors is handled by the function itself, and thus the errors are handled here solely by exiting.

+ +

Parse lines into sections:

+ +
const sections = data.parse(lines, alloc) catch |err| switch (err) {
+    error.UnexpectedStart,
+    error.UnexpectedEnd => {
+        return 1;
+    },
+    else => |e| return e,
+};
+
+ +

The code file is then generated. This entails resolving references to section names, which may return an error, handled by exiting as above.

+ +

Generate code:

+ +
const code = data.codegen(lines, sections, alloc) catch |err| switch (err) {
+    error.DereferenceLimit,
+    error.NotFound => {
+        return 1;
+    },
+    else => |e| return e,
+};
+
+ +

Finally, the lines of the code file are written to stdout, separated by newlines.

+ +

Write to stdout:

+ +
for (code) |line| {
+    try stdout.writer().print("{s}\n", .{line});
+}
+
+
+ +
+ + + diff --git a/doc/online/weave.html b/doc/online/weave.html new file mode 100755 index 0000000..59fc750 --- /dev/null +++ b/doc/online/weave.html @@ -0,0 +1,128 @@ + + + + + + + + + + + +weave.lp — DistressNetwork° + + + +
+
+
weave.lp
+
+

The structure of this file is quite similar to that of tangle.zig, only differing in terms of which functions are used to transform the input data.

+ +

*:

+ +
(License)
+
+(Imports)
+
+pub fn main() !u8 {
+    (IO initialization)
+
+    (Allocator initialization)
+
+    (Read file from stdin)
+
+    (Split into lines)
+
+    (Generate text)
+
+    (Write to stdout)
+
+    return 0;
+}
+
+ +

License:

+ +
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+ +

First we import the other files containing the core functions.

+ +

Imports:

+ +
const std = @import("std");
+const data = @import("data.zig");
+const log = @import("log.zig").log;
+
+const Allocator = std.mem.Allocator;
+
+ +

Within the main procedure, we first initialize the stdin and stdout interfaces.

+ +

IO initialization:

+ +
const stdin = std.io.getStdIn();
+const stdout = std.io.getStdOut();
+
+ +

We then initialize the allocator, deferring its deinitialization to the end of the process. Since the overall memory usage pattern is one in which all resources may be freed at once, the arena allocator is the most appropriate choice for this program.

+ +

Allocator initialization:

+ +
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
+var alloc = arena.allocator();
+defer arena.deinit();
+
+ +

The input file is then read from stdin. In the case of input exceeding the maximum permitted file size, the program may report the error and exit normally. All other errors which may be returned are memory allocation failures and should thus yield control to the panic handler.

+ +

Read file from stdin:

+ +
const input = stdin.reader().readAllAlloc(alloc, data.input_max) catch |err| switch (err) {
+    error.StreamTooLong => {
+        log(.err, "input too large (maximum {})", .{std.fmt.fmtIntSizeBin(data.input_max)});
+        return 1;
+    },
+    else => |e| return e,
+};
+
+ +

We then pass the input into the line splitting function, creating an array of strings.

+ +

Split into lines:

+ +
const lines = try data.split_lines(input, alloc);
+
+ +

The text file is then generated. This entails searching for the configuration declarations, which may fail and thus return an error. Logging such errors is handled by the function itself, and thus the errors are handled here solely by exiting.

+ +

Generate text:

+ +
const text = data.textgen(lines, alloc) catch |err| switch (err) {
+    error.NotFound => {
+        return 1;
+    },
+    else => |e| return e,
+};
+
+ +

Finally, the lines of the text file are written to stdout, separated by newlines.

+ +

Write to stdout:

+ +
for (text) |line| {
+    try stdout.writer().print("{s}\n", .{line});
+}
+
+
+ +
+ + + diff --git a/doc/tangle.html b/doc/tangle.html old mode 100755 new mode 100644 index 02e5905..85ede91 --- a/doc/tangle.html +++ b/doc/tangle.html @@ -3,42 +3,54 @@ - - - - - - -tangle.lp — DistressNetwork° - + + -
-
-
tangle.lp
-
+

tangle.lp

The structure of this file is quite similar to that of weave.zig, only differing in terms of which functions are used to transform the input data.

*:

-
(License)
+
(License)
 
-(Imports)
+(Imports)
 
 pub fn main() !u8 {
-    (IO initialization)
+    (IO initialization)
 
-    (Allocator initialization)
+    (Allocator initialization)
 
-    (Read file from stdin)
+    (Read file from stdin)
 
-    (Split into lines)
+    (Split into lines)
 
-    (Parse lines into sections)
+    (Parse lines into sections)
 
-    (Generate code)
+    (Generate code)
 
-    (Write to stdout)
+    (Write to stdout)
 
     return 0;
 }
@@ -46,8 +58,8 @@ pub fn main() !u8 {
 
 

License:

-
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 

First we import the other files containing the core functions.

@@ -132,13 +144,5 @@ defer arena.deinit(); try stdout.writer().print("{s}\n", .{line}); }
-
- -
- - + \ No newline at end of file diff --git a/doc/usage.md b/doc/usage.md index 0cc9632..a008409 100755 --- a/doc/usage.md +++ b/doc/usage.md @@ -8,7 +8,7 @@ This tool consists of two programs, `tangle` and `weave`, both of which accept a ## Usage -These programs are intended to be intermediary text processing functions, used as components within build automation routines or shell scripts. The build processes for this tool itself, `build.sh` and `gen-freestanding.sh`, serve as example use cases. +These programs are intended to be intermediary text processing functions, used as components within build automation routines or shell scripts. The build processes for this tool itself, `build.sh` and `gen-docs.sh`, serve as example use cases. # Dependencies @@ -78,7 +78,7 @@ The format strings may contain the following control sequences: # Literate Source Code -The source code of this tool is rendered as a set of literate program documents, listed below. Additionally, freestanding versions of these documents exist in the `doc/freestanding` source directory for offline use. +The source code of this tool is rendered as a set of literate program documents, listed below. - [data.lp](/software/literary/data.html) - [log.lp](/software/literary/log.html) diff --git a/doc/weave.html b/doc/weave.html old mode 100755 new mode 100644 index 59fc750..9e851ba --- a/doc/weave.html +++ b/doc/weave.html @@ -3,40 +3,52 @@ - - - - - - -weave.lp — DistressNetwork° - + + -
-
-
weave.lp
-
+

weave.lp

The structure of this file is quite similar to that of tangle.zig, only differing in terms of which functions are used to transform the input data.

*:

-
(License)
+
(License)
 
-(Imports)
+(Imports)
 
 pub fn main() !u8 {
-    (IO initialization)
+    (IO initialization)
 
-    (Allocator initialization)
+    (Allocator initialization)
 
-    (Read file from stdin)
+    (Read file from stdin)
 
-    (Split into lines)
+    (Split into lines)
 
-    (Generate text)
+    (Generate text)
 
-    (Write to stdout)
+    (Write to stdout)
 
     return 0;
 }
@@ -44,8 +56,8 @@ pub fn main() !u8 {
 
 

License:

-
// Copyright 2022 DistressNetwork° <uplink@distress.network>
-// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
// Copyright 2022 DistressNetwork° <uplink@distress.network>
+// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 

First we import the other files containing the core functions.

@@ -116,13 +128,5 @@ defer arena.deinit(); try stdout.writer().print("{s}\n", .{line}); }
-
- -
- - + \ No newline at end of file diff --git a/gen-docs.sh b/gen-docs.sh index 6dcd149..5dfd166 100755 --- a/gen-docs.sh +++ b/gen-docs.sh @@ -1,26 +1,11 @@ #!/bin/sh -e -# This script solely generates the formatted code as hosted on the distress.network website. It is thus not intended for actual use by anyone other than the site owner. - -tmp=$(mktemp -p /tmp) -css="h3,.lp-ref {font-family: neue-haas-grotesk-text, var(--fs-sans); font-size: 1rem; font-weight: normal; font-style: italic;} h3 {margin: 1rem;}" - for file in "data" "log" "tangle" "weave" ; do in="${file}.lp" ; out="doc/${file}.html" ; - cat "doc/header" "$in" | ./weave > "$tmp" ; - sed "s;title: ;&${in};1" -i'' "$tmp" ; - sed "s;leading: ;&${in};1" -i'' "$tmp" ; - - pushd "../web" > "/dev/null" ; - sh "./md.sh" "$tmp" "../literary/$out" > "/dev/null" ; - popd > "/dev/null" ; - sed "/~\n&~1" -i'' "$out" ; - sed 's;\(.*\)@= \(.*\) =@;\1(\2);1' -i'' "$out" ; - + cp "doc/header-html" "$out" ; + printf "

${in}

\n" >> "$out" ; + cat "doc/header-lp" "$in" | ./weave | lowdown -Thtml >> "$out" ; + printf "\n" >> "$out" ; + sed 's/\(.*\)@= \(.*\) =@/\1(\2)<\/span>/1' -i'' "$out" ; echo "[ok] generated text '${file}'" ; done - -sh "./../sync.sh" - -rm "$tmp" diff --git a/gen-freestanding.sh b/gen-freestanding.sh deleted file mode 100755 index 9477463..0000000 --- a/gen-freestanding.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -e - -for file in "data" "log" "tangle" "weave" ; do - in="${file}.lp" ; out="doc/freestanding/${file}.html" ; - cp "doc/header-html" "$out" ; - printf "

${in}

\n" >> "$out" ; - cat "doc/header-lp" "$in" | ./weave | lowdown -Thtml >> "$out" ; - printf "\n" >> "$out" ; - sed 's/\(.*\)@= \(.*\) =@/\1(\2)<\/span>/1' -i'' "$out" ; - echo "[ok] generated text '${file}'" ; -done diff --git a/gen-online.sh b/gen-online.sh new file mode 100755 index 0000000..4ba5a5b --- /dev/null +++ b/gen-online.sh @@ -0,0 +1,26 @@ +#!/bin/sh -e + +# This script solely generates the formatted code as hosted on the distress.network website. It is thus not intended for actual use by anyone other than the site owner. + +tmp=$(mktemp -p /tmp) +css="h3,.lp-ref {font-family: neue-haas-grotesk-text, var(--fs-sans); font-size: 1rem; font-weight: normal; font-style: italic;} h3 {margin: 1rem;}" + +for file in "data" "log" "tangle" "weave" ; do + in="${file}.lp" ; out="doc/online/${file}.html" ; + cat "doc/online/header" "$in" | ./weave > "$tmp" ; + sed "s;title: ;&${in};1" -i'' "$tmp" ; + sed "s;leading: ;&${in};1" -i'' "$tmp" ; + + pushd "../web" > "/dev/null" ; + sh "./md.sh" "$tmp" "../literary/$out" > "/dev/null" ; + popd > "/dev/null" ; + sed "/~\n&~1" -i'' "$out" ; + sed 's;\(.*\)@= \(.*\) =@;\1(\2);1' -i'' "$out" ; + + echo "[ok] generated text '${file}'" ; +done + +sh "./../sync.sh" + +rm "$tmp" -- cgit v1.2.3