Create an array of Atoms from a byte array containing a sequence of PDF objects

 

   

Syntax
 

[C#]
static ArrayAtom FromContentStream(string text)
static ArrayAtom FromContentStream(byte[] data)
static ArrayAtom FromContentStream(byte[] value, out IList<int> offsets)

[Visual Basic]
Shared Function FromContentStream(text As String) As ArrayAtom
Shared Function FromContentStream(data() As Byte) As ArrayAtom
Shared Function FromContentStream(data() As Byte, Out IList<int> offsets) As ArrayAtom

 

   

Params
 
Name Description
text A string holding a sequence of atoms.
data A chunk of data holding the sequence of atoms.
offsets The byte offsets in the data for the start each of atom in the returned array.
return An ArrayAtom holding the atoms in the content stream.

 

   

Notes
 

Create an array of Atoms from a byte array containing a sequence of PDF objects.

This method is useful for deconstructing PDF content streams for analysis and modification. To convert back into a content stream you can use the Atom.GetData function.

The returned offsets relate to the byte offsets for the start of each atom found in the stream. However the returned array may contain atoms containing other atoms so the mapping from one to the other is not as simple as it might seem.

Take a returned array containing one DictAtom mapping V to the number one. The index will contain three offsets - the DictAtom, the NameAtom "V", the NumAtom 1.

 

   

Example
 

This example shows how to use the FromContentStream function to parse and display a PDF content stream.

 

StringBuilder sb = new StringBuilder(); using (Doc doc = new Doc()) {   doc.Read(Server.MapPath("spaceshuttle.pdf"));   Page page = doc.ObjectSoup[doc.Page] as Page;   ArrayAtom array = ArrayAtom.FromContentStream(page.GetContentData());   int indent = 0;   HashSet<string> indentPlus = new HashSet<string>(new string[] { "q", "BT" });   HashSet<string> indentMinus = new HashSet<string>(new string[] { "Q", "ET" });   IList<Tuple<string, int>> items = OpAtom.Find(array);   int index = 0;   foreach (var pair in items) {     string op = ((OpAtom)array[pair.Item2]).Text;     // add indent to code     if (indentMinus.Contains(op))       indent--;     for (int i = 0; i < indent; i++)       sb.Append(" ");     // write out the operators     for (int i = index; i <= pair.Item2; i++) {       if (i != index)         sb.Append(" ");       Atom item = array[i];       // we write arrays out individually so that       // we can override default cr lf behavior       ArrayAtom itemArray = item as ArrayAtom;       if (itemArray != null) {         int n = itemArray.Count;         sb.Append("[");         for (int j = 0; j < n; j++) {           sb.Append(itemArray[j].ToString());           if (j != n - 1)             sb.Append(" ");         }         sb.Append("]");       }       else {         sb.Append(item.ToString());       }     }     sb.AppendLine();     if (indentPlus.Contains(op))       indent++;     index = pair.Item2 + 1;   }   // write out any atoms that are left over   for (int i = index; i < array.Count; i++) {     sb.Append(" ");     sb.Append(array[i].ToString());   } } using (Doc doc = new Doc()) {   doc.Font = doc.AddFont("Courier");   doc.Rect.Inset(20, 20);   doc.AddText(sb.ToString());   doc.Save(Server.MapPath("PageContents.pdf")); }


PageContents.pdf